This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add variant_under_utf8_count() core function
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
df6b4bd5 156 XPV* const p = (XPV*)SvANY(re);
8d919b0a 157 assert(isREGEXP(re));
df6b4bd5
DM
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
8d919b0a
FC
160}
161
27669aa4
FC
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
2439e033 167 if (LIKELY(sv != NULL))
27669aa4
FC
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
2439e033 180 if (LIKELY(sv != NULL))
27669aa4
FC
181 SvREFCNT(sv)++;
182}
75e16a44
FC
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
2439e033 186 if (LIKELY(sv != NULL)) {
75a9bf96 187 U32 rc = SvREFCNT(sv);
79e2a32a 188 if (LIKELY(rc > 1))
75a9bf96
DM
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
192 }
193}
541377b1
FC
194
195PERL_STATIC_INLINE void
4a9a56a7
DM
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
79e2a32a 199 if (LIKELY(rc > 1))
4a9a56a7
DM
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
541377b1
FC
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
541377b1
FC
219S_SvPADSTALE_on(SV *sv)
220{
c0683843 221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
c0683843 227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
25fdce4a 230#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 231PERL_STATIC_INLINE STRLEN
6964422a 232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 233{
25fdce4a 234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
f019c49e 243
d1decf2b
TC
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
c1d6452f 247#ifdef GCC_DIAG_PRAGMA
6ab56f1e 248GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
c1d6452f 249#endif
d1decf2b
TC
250static void
251S_croak_memory_wrap(void)
252{
253 Perl_croak_nocontext("%s",PL_memory_wrap);
254}
c1d6452f 255#ifdef GCC_DIAG_PRAGMA
6ab56f1e 256GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
c1d6452f 257#endif
d1decf2b 258
a8a2ceaa
KW
259/* ------------------------------- utf8.h ------------------------------- */
260
2fe720e2
KW
261/*
262=head1 Unicode Support
263*/
264
55d09dc8
KW
265PERL_STATIC_INLINE void
266S_append_utf8_from_native_byte(const U8 byte, U8** dest)
267{
268 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
269 * encoded string at '*dest', updating '*dest' to include it */
270
55d09dc8
KW
271 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
272
6f2d5cbc 273 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 274 *((*dest)++) = byte;
55d09dc8 275 else {
a09ec51a
KW
276 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
277 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
278 }
279}
280
e123187a 281/*
2fe720e2 282=for apidoc valid_utf8_to_uvchr
2717076a 283Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
284the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
285it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
286non-Unicode code points are allowed.
287
288=cut
289
290 */
291
292PERL_STATIC_INLINE UV
293Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
294{
c41b2540 295 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
296 const U8* send = s + expectlen;
297 UV uv = *s;
298
299 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
300
301 if (retlen) {
302 *retlen = expectlen;
303 }
304
305 /* An invariant is trivially returned */
306 if (expectlen == 1) {
307 return uv;
308 }
309
310 /* Remove the leading bits that indicate the number of bytes, leaving just
311 * the bits that are part of the value */
312 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
313
314 /* Now, loop through the remaining bytes, accumulating each into the
315 * working total as we go. (I khw tried unrolling the loop for up to 4
316 * bytes, but there was no performance improvement) */
317 for (++s; s < send; s++) {
318 uv = UTF8_ACCUMULATE(uv, *s);
319 }
320
321 return UNI_TO_NATIVE(uv);
322
323}
324
1e599354
KW
325/*
326=for apidoc is_utf8_invariant_string
327
82c5d941 328Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 329regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
330EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
331are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
332the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
333characters are invariant, but so also are the C1 controls.
1e599354
KW
334
335If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
336use this option, that C<s> can't have embedded C<NUL> characters and has to
337have a terminating C<NUL> byte).
338
9f2abfde
KW
339See also
340C<L</is_utf8_string>>,
341C<L</is_utf8_string_flags>>,
342C<L</is_utf8_string_loc>>,
343C<L</is_utf8_string_loc_flags>>,
344C<L</is_utf8_string_loclen>>,
345C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
346C<L</is_utf8_fixed_width_buf_flags>>,
347C<L</is_utf8_fixed_width_buf_loc_flags>>,
348C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
349C<L</is_strict_utf8_string>>,
350C<L</is_strict_utf8_string_loc>>,
351C<L</is_strict_utf8_string_loclen>>,
352C<L</is_c9strict_utf8_string>>,
353C<L</is_c9strict_utf8_string_loc>>,
354and
355C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
356
357=cut
0cbf5865
KW
358
359*/
360
361#define is_utf8_invariant_string(s, len) \
362 is_utf8_invariant_string_loc(s, len, NULL)
363
364/*
365=for apidoc is_utf8_invariant_string_loc
366
367Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
368the first UTF-8 variant character in the C<ep> pointer; if all characters are
369UTF-8 invariant, this function does not change the contents of C<*ep>.
370
371=cut
372
1e599354
KW
373*/
374
375PERL_STATIC_INLINE bool
e17544a6 376S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1e599354 377{
e17544a6 378 const U8* send;
1e599354
KW
379 const U8* x = s;
380
0cbf5865
KW
381 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
382
e17544a6
KW
383 if (len == 0) {
384 len = strlen((const char *)s);
385 }
386
387 send = s + len;
388
389#ifndef EBCDIC
4ab2fd9b
KW
390
391/* This looks like 0x010101... */
392#define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
393
394/* This looks like 0x808080... */
395#define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
396#define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
397#define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
e17544a6 398
099e59a4
KW
399/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
400 * or'ing together the lowest bits of 'x'. Hopefully the final term gets
401 * optimized out completely on a 32-bit system, and its mask gets optimized out
402 * on a 64-bit system */
403#define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
404 | (PTR2nat(x) >> 1) \
405 | ( (PTR2nat(x) >> 2) \
406 & PERL_WORD_BOUNDARY_MASK)))
407
408 /* Do the word-at-a-time iff there is at least one usable full word. That
409 * means that after advancing to a word boundary, there still is at least a
410 * full word left. The number of bytes needed to advance is 'wordsize -
411 * offset' unless offset is 0. */
412 if ((STRLEN) (send - x) >= PERL_WORDSIZE
413
414 /* This term is wordsize if subword; 0 if not */
415 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
416
417 /* 'offset' */
418 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
419 {
b40579ff 420
46bb68f6
KW
421 /* Process per-byte until reach word boundary. XXX This loop could be
422 * eliminated if we knew that this platform had fast unaligned reads */
b40579ff 423 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
46bb68f6
KW
424 if (! UTF8_IS_INVARIANT(*x)) {
425 if (ep) {
426 *ep = x;
427 }
e17544a6 428
46bb68f6
KW
429 return FALSE;
430 }
431 x++;
e17544a6 432 }
e17544a6 433
099e59a4
KW
434 /* Here, we know we have at least one full word to process. Process
435 * per-word as long as we have at least a full word left */
436 do {
4ab2fd9b 437 if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
e17544a6 438
46bb68f6
KW
439 /* Found a variant. Just return if caller doesn't want its
440 * exact position */
441 if (! ep) {
442 return FALSE;
443 }
e17544a6 444
46bb68f6
KW
445 /* Otherwise fall into final loop to find which byte it is */
446 break;
447 }
448 x += PERL_WORDSIZE;
099e59a4 449 } while (x + PERL_WORDSIZE <= send);
b40579ff 450 }
e17544a6 451
e17544a6
KW
452#endif
453
454 /* Process per-byte */
455 while (x < send) {
456 if (! UTF8_IS_INVARIANT(*x)) {
457 if (ep) {
458 *ep = x;
459 }
0cbf5865 460
e17544a6 461 return FALSE;
0cbf5865 462 }
1e599354 463
e17544a6 464 x++;
1e599354
KW
465 }
466
467 return TRUE;
468}
469
03c1e4ab
KW
470#if defined(PERL_CORE) || defined(PERL_EXT)
471
472/*
473=for apidoc variant_under_utf8_count
474
475This function looks at the sequence of bytes between C<s> and C<e>, which are
476assumed to be encoded in ASCII/Latin1, and returns how many of them would
477change should the string be translated into UTF-8. Due to the nature of UTF-8,
478each of these would occupy two bytes instead of the single one in the input
479string. Thus, this function returns the precise number of bytes the string
480would expand by when translated to UTF-8.
481
482Unlike most of the other functions that have C<utf8> in their name, the input
483to this function is NOT a UTF-8-encoded string. The function name is slightly
484I<odd> to emphasize this.
485
486This function is internal to Perl because khw thinks that any XS code that
487would want this is probably operating too close to the internals. Presenting a
488valid use case could change that.
489
490See also
491C<L<perlapi/is_utf8_invariant_string>>
492and
493C<L<perlapi/is_utf8_invariant_string_loc>>,
494
495=cut
496
497*/
498
499PERL_STATIC_INLINE Size_t
500S_variant_under_utf8_count(const U8* const s, const U8* const e)
501{
502 const U8* x = s;
503 Size_t count = 0;
504
505 PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
506
507# ifndef EBCDIC
508
509 if ((STRLEN) (e - x) >= PERL_WORDSIZE
510 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
511 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
512 {
513
514 /* Process per-byte until reach word boundary. XXX This loop could be
515 * eliminated if we knew that this platform had fast unaligned reads */
516 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
517 count += ! UTF8_IS_INVARIANT(*x++);
518 }
519
520 /* Process per-word as long as we have at least a full word left */
521 do {
522
523 /* It's easier to look at a 16-bit word size to see how this works.
524 * The expression would be:
525 *
526 * (((*x & 0x8080) >> 7) * 0x0101) >> 8;
527 *
528 * Suppose the value of *x is the 16 bits
529 *
530 * 0by_______z_______
531 *
532 * where the 14 bits represented by '_' could be any combination of
533 * 0's or 1's (we don't care), and 'y' is the high bit of one byte,
534 * and 'z' is the high bit for the other (endianness doesn't
535 * matter). On ASCII platforms a byte is variant if the high bit
536 * is set; invariant otherwise. Thus, our goal, the count of
537 * variants in this 2-byte word is
538 *
539 * y + z
540 *
541 * To turn 0by_______z_______ into (y + z) we mask the intial value
542 * with 0x8080 to turn it into
543 *
544 * 0by0000000z0000000
545 *
546 * Then right shifting by 7 yields
547 *
548 * 0by0000000z
549 *
550 * Viewed as a number, this is
551 *
552 * 2**8 * y + z
553 *
554 * We then multiply by 0x0101 (which is = 2**8 + 1), so
555 *
556 * (2**8 * y + z) * (2**8 + 1)
557 * = (2**8 * y * 2**8) + (z * 2**8) + (2**8 * y * 1) + (z * 1)
558 * = (2**16 * y) + (2**8 * (y + z)) + z
559 *
560 * However (2**16 * y) doesn't fit in a 16-bit word (unless 'y' is
561 * zero in which case it is 0), and since this is unsigned
562 * multiplication, the C standard says that this component just
563 * gets ignored, so we are left with
564 *
565 * = 2**8 * (y + z) + z
566 *
567 * We then shift right by 8 bits, which divides by 2**8, and gets
568 * rid of the lone 'z', leaving us with
569 *
570 * = y + z
571 *
572 * The same principles apply for longer word sizes. For 32 bit
573 * words we end up with
574 *
575 * = 2**24 * (w + x + y + z) + (lots of other expressions
576 * below 2**24)
577 *
578 * with anything above 2**24 having overflowed and been chopped
579 * off. Shifting right by 24 yields (w + x + y + z)
580 */
581
582 count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
583 * PERL_COUNT_MULTIPLIER)
584 >> ((PERL_WORDSIZE - 1) * CHARBITS);
585 x += PERL_WORDSIZE;
586 } while (x + PERL_WORDSIZE <= e);
587 }
588
589# endif
590
591 /* Process per-byte */
592 while (x < e) {
593 if (! UTF8_IS_INVARIANT(*x)) {
594 count++;
595 }
596
597 x++;
598 }
599
600 return count;
601}
602
603#endif
604
605#undef PERL_WORDSIZE
606#undef PERL_COUNT_MULTIPLIER
607#undef PERL_WORD_BOUNDARY_MASK
608#undef PERL_VARIANTS_WORD_MASK
609
7c93d8f0 610/*
5ff889fb
KW
611=for apidoc is_utf8_string
612
82c5d941
KW
613Returns TRUE if the first C<len> bytes of string C<s> form a valid
614Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
615be calculated using C<strlen(s)> (which means if you use this option, that C<s>
616can't have embedded C<NUL> characters and has to have a terminating C<NUL>
617byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
618
2717076a
KW
619This function considers Perl's extended UTF-8 to be valid. That means that
620code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
621considered valid by this function. Use C<L</is_strict_utf8_string>>,
622C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
623code points are considered valid.
5ff889fb 624
9f2abfde
KW
625See also
626C<L</is_utf8_invariant_string>>,
0cbf5865 627C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
628C<L</is_utf8_string_loc>>,
629C<L</is_utf8_string_loclen>>,
8bc127bf
KW
630C<L</is_utf8_fixed_width_buf_flags>>,
631C<L</is_utf8_fixed_width_buf_loc_flags>>,
632C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
633
634=cut
635*/
636
dd237e82 637#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
5ff889fb 638
c9cd936b
KW
639#if defined(PERL_CORE) || defined (PERL_EXT)
640
641/*
642=for apidoc is_utf8_non_invariant_string
643
644Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
645C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
646UTF-8; otherwise returns FALSE.
647
648A TRUE return means that at least one code point represented by the sequence
649either is a wide character not representable as a single byte, or the
650representation differs depending on whether the sequence is encoded in UTF-8 or
651not.
652
653See also
654C<L<perlapi/is_utf8_invariant_string>>,
655C<L<perlapi/is_utf8_string>>
656
657=cut
658
659This is commonly used to determine if a SV's UTF-8 flag should be turned on.
660It needn't be if its string is entirely UTF-8 invariant, and it shouldn't be if
661it otherwise contains invalid UTF-8.
662
663It is an internal function because khw thinks that XS code shouldn't be working
664at this low a level. A valid use case could change that.
665
666*/
667
668PERL_STATIC_INLINE bool
669S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
670{
671 const U8 * first_variant;
672
673 PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
674
675 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
676 return FALSE;
677 }
678
679 return is_utf8_string(first_variant, len - (first_variant - s));
680}
681
682#endif
683
5ff889fb 684/*
9f2abfde
KW
685=for apidoc is_strict_utf8_string
686
687Returns TRUE if the first C<len> bytes of string C<s> form a valid
688UTF-8-encoded string that is fully interchangeable by any application using
689Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
690calculated using C<strlen(s)> (which means if you use this option, that C<s>
691can't have embedded C<NUL> characters and has to have a terminating C<NUL>
692byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
693
694This function returns FALSE for strings containing any
695code points above the Unicode max of 0x10FFFF, surrogate code points, or
696non-character code points.
697
698See also
699C<L</is_utf8_invariant_string>>,
0cbf5865 700C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
701C<L</is_utf8_string>>,
702C<L</is_utf8_string_flags>>,
703C<L</is_utf8_string_loc>>,
704C<L</is_utf8_string_loc_flags>>,
705C<L</is_utf8_string_loclen>>,
706C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
707C<L</is_utf8_fixed_width_buf_flags>>,
708C<L</is_utf8_fixed_width_buf_loc_flags>>,
709C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
710C<L</is_strict_utf8_string_loc>>,
711C<L</is_strict_utf8_string_loclen>>,
712C<L</is_c9strict_utf8_string>>,
713C<L</is_c9strict_utf8_string_loc>>,
714and
715C<L</is_c9strict_utf8_string_loclen>>.
716
717=cut
718*/
719
dd237e82 720#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
9f2abfde
KW
721
722/*
723=for apidoc is_c9strict_utf8_string
724
725Returns TRUE if the first C<len> bytes of string C<s> form a valid
726UTF-8-encoded string that conforms to
727L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
728otherwise it returns FALSE. If C<len> is 0, it will be calculated using
729C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
730C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
731characters being ASCII constitute 'a valid UTF-8 string'.
732
733This function returns FALSE for strings containing any code points above the
734Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
735code points per
736L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
737
738See also
739C<L</is_utf8_invariant_string>>,
0cbf5865 740C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
741C<L</is_utf8_string>>,
742C<L</is_utf8_string_flags>>,
743C<L</is_utf8_string_loc>>,
744C<L</is_utf8_string_loc_flags>>,
745C<L</is_utf8_string_loclen>>,
746C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
747C<L</is_utf8_fixed_width_buf_flags>>,
748C<L</is_utf8_fixed_width_buf_loc_flags>>,
749C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
750C<L</is_strict_utf8_string>>,
751C<L</is_strict_utf8_string_loc>>,
752C<L</is_strict_utf8_string_loclen>>,
753C<L</is_c9strict_utf8_string_loc>>,
754and
755C<L</is_c9strict_utf8_string_loclen>>.
756
757=cut
758*/
759
dd237e82 760#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
9f2abfde
KW
761
762/*
763=for apidoc is_utf8_string_flags
764
765Returns TRUE if the first C<len> bytes of string C<s> form a valid
766UTF-8 string, subject to the restrictions imposed by C<flags>;
767returns FALSE otherwise. If C<len> is 0, it will be calculated
768using C<strlen(s)> (which means if you use this option, that C<s> can't have
769embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
770that all characters being ASCII constitute 'a valid UTF-8 string'.
771
772If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
773C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
774as C<L</is_strict_utf8_string>>; and if C<flags> is
775C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
776C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
777combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
778C<L</utf8n_to_uvchr>>, with the same meanings.
779
780See also
781C<L</is_utf8_invariant_string>>,
0cbf5865 782C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
783C<L</is_utf8_string>>,
784C<L</is_utf8_string_loc>>,
785C<L</is_utf8_string_loc_flags>>,
786C<L</is_utf8_string_loclen>>,
787C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
788C<L</is_utf8_fixed_width_buf_flags>>,
789C<L</is_utf8_fixed_width_buf_loc_flags>>,
790C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
791C<L</is_strict_utf8_string>>,
792C<L</is_strict_utf8_string_loc>>,
793C<L</is_strict_utf8_string_loclen>>,
794C<L</is_c9strict_utf8_string>>,
795C<L</is_c9strict_utf8_string_loc>>,
796and
797C<L</is_c9strict_utf8_string_loclen>>.
798
799=cut
800*/
801
802PERL_STATIC_INLINE bool
f60f61fd 803S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
9f2abfde 804{
33756530 805 const U8 * first_variant;
9f2abfde
KW
806
807 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
808 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 809 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 810
f60f61fd
KW
811 if (len == 0) {
812 len = strlen((const char *)s);
813 }
814
9f2abfde
KW
815 if (flags == 0) {
816 return is_utf8_string(s, len);
817 }
818
d044b7a7 819 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
820 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
821 {
822 return is_strict_utf8_string(s, len);
823 }
824
d044b7a7 825 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
826 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
827 {
828 return is_c9strict_utf8_string(s, len);
829 }
830
33756530
KW
831 if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
832 const U8* const send = s + len;
833 const U8* x = first_variant;
834
a0d7f935
KW
835 while (x < send) {
836 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
837 if (UNLIKELY(! cur_len)) {
838 return FALSE;
839 }
840 x += cur_len;
9f2abfde 841 }
33756530 842 }
9f2abfde
KW
843
844 return TRUE;
845}
846
847/*
5ff889fb
KW
848
849=for apidoc is_utf8_string_loc
850
2717076a 851Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 852case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 853"utf8ness success") in the C<ep> pointer.
5ff889fb 854
2717076a 855See also C<L</is_utf8_string_loclen>>.
5ff889fb 856
3964c812
KW
857=cut
858*/
859
860#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
861
862/*
863
5ff889fb
KW
864=for apidoc is_utf8_string_loclen
865
2717076a 866Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 867case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 868"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 869encoded characters in the C<el> pointer.
5ff889fb 870
2717076a 871See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
872
873=cut
874*/
875
56e4cf64 876PERL_STATIC_INLINE bool
33756530 877Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb 878{
33756530 879 const U8 * first_variant;
5ff889fb
KW
880
881 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
882
33756530
KW
883 if (len == 0) {
884 len = strlen((const char *) s);
885 }
886
887 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
888 if (el)
889 *el = len;
890
891 if (ep) {
892 *ep = s + len;
893 }
894
895 return TRUE;
896 }
897
898 {
899 const U8* const send = s + len;
900 const U8* x = first_variant;
901 STRLEN outlen = first_variant - s;
902
a0d7f935
KW
903 while (x < send) {
904 const STRLEN cur_len = isUTF8_CHAR(x, send);
905 if (UNLIKELY(! cur_len)) {
906 break;
907 }
908 x += cur_len;
909 outlen++;
5ff889fb 910 }
5ff889fb 911
a0d7f935
KW
912 if (el)
913 *el = outlen;
5ff889fb 914
a0d7f935
KW
915 if (ep) {
916 *ep = x;
917 }
5ff889fb 918
a0d7f935 919 return (x == send);
33756530 920 }
5ff889fb
KW
921}
922
923/*
9f2abfde
KW
924
925=for apidoc is_strict_utf8_string_loc
926
927Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
928case of "utf8ness failure") or the location C<s>+C<len> (in the case of
929"utf8ness success") in the C<ep> pointer.
930
931See also C<L</is_strict_utf8_string_loclen>>.
932
933=cut
934*/
935
936#define is_strict_utf8_string_loc(s, len, ep) \
937 is_strict_utf8_string_loclen(s, len, ep, 0)
938
939/*
940
941=for apidoc is_strict_utf8_string_loclen
942
943Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
944case of "utf8ness failure") or the location C<s>+C<len> (in the case of
945"utf8ness success") in the C<ep> pointer, and the number of UTF-8
946encoded characters in the C<el> pointer.
947
948See also C<L</is_strict_utf8_string_loc>>.
949
950=cut
951*/
952
953PERL_STATIC_INLINE bool
33756530 954S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 955{
33756530 956 const U8 * first_variant;
9f2abfde
KW
957
958 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
959
33756530
KW
960 if (len == 0) {
961 len = strlen((const char *) s);
962 }
963
964 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
965 if (el)
966 *el = len;
967
968 if (ep) {
969 *ep = s + len;
970 }
971
972 return TRUE;
973 }
974
975 {
976 const U8* const send = s + len;
977 const U8* x = first_variant;
978 STRLEN outlen = first_variant - s;
979
a0d7f935
KW
980 while (x < send) {
981 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
982 if (UNLIKELY(! cur_len)) {
983 break;
984 }
985 x += cur_len;
986 outlen++;
9f2abfde 987 }
9f2abfde 988
a0d7f935
KW
989 if (el)
990 *el = outlen;
9f2abfde 991
a0d7f935
KW
992 if (ep) {
993 *ep = x;
994 }
9f2abfde 995
a0d7f935 996 return (x == send);
33756530 997 }
9f2abfde
KW
998}
999
1000/*
1001
1002=for apidoc is_c9strict_utf8_string_loc
1003
1004Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1005the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1006"utf8ness success") in the C<ep> pointer.
1007
1008See also C<L</is_c9strict_utf8_string_loclen>>.
1009
1010=cut
1011*/
1012
1013#define is_c9strict_utf8_string_loc(s, len, ep) \
1014 is_c9strict_utf8_string_loclen(s, len, ep, 0)
1015
1016/*
1017
1018=for apidoc is_c9strict_utf8_string_loclen
1019
1020Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1021the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1022"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
1023characters in the C<el> pointer.
1024
1025See also C<L</is_c9strict_utf8_string_loc>>.
1026
1027=cut
1028*/
1029
1030PERL_STATIC_INLINE bool
33756530 1031S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1032{
33756530 1033 const U8 * first_variant;
9f2abfde
KW
1034
1035 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
1036
33756530
KW
1037 if (len == 0) {
1038 len = strlen((const char *) s);
1039 }
1040
1041 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1042 if (el)
1043 *el = len;
1044
1045 if (ep) {
1046 *ep = s + len;
1047 }
1048
1049 return TRUE;
1050 }
1051
1052 {
1053 const U8* const send = s + len;
1054 const U8* x = first_variant;
1055 STRLEN outlen = first_variant - s;
1056
a0d7f935
KW
1057 while (x < send) {
1058 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
1059 if (UNLIKELY(! cur_len)) {
1060 break;
1061 }
1062 x += cur_len;
1063 outlen++;
9f2abfde 1064 }
9f2abfde 1065
a0d7f935
KW
1066 if (el)
1067 *el = outlen;
9f2abfde 1068
a0d7f935
KW
1069 if (ep) {
1070 *ep = x;
1071 }
9f2abfde 1072
a0d7f935 1073 return (x == send);
33756530 1074 }
9f2abfde
KW
1075}
1076
1077/*
1078
1079=for apidoc is_utf8_string_loc_flags
1080
1081Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1082case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1083"utf8ness success") in the C<ep> pointer.
1084
1085See also C<L</is_utf8_string_loclen_flags>>.
1086
1087=cut
1088*/
1089
1090#define is_utf8_string_loc_flags(s, len, ep, flags) \
1091 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
1092
1093
1094/* The above 3 actual functions could have been moved into the more general one
1095 * just below, and made #defines that call it with the right 'flags'. They are
1096 * currently kept separate to increase their chances of getting inlined */
1097
1098/*
1099
1100=for apidoc is_utf8_string_loclen_flags
1101
1102Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1103case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1104"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1105encoded characters in the C<el> pointer.
1106
1107See also C<L</is_utf8_string_loc_flags>>.
1108
1109=cut
1110*/
1111
1112PERL_STATIC_INLINE bool
f60f61fd 1113S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
9f2abfde 1114{
33756530 1115 const U8 * first_variant;
9f2abfde
KW
1116
1117 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
1118 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1119 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 1120
f60f61fd 1121 if (len == 0) {
a0d7f935 1122 len = strlen((const char *) s);
f60f61fd
KW
1123 }
1124
9f2abfde
KW
1125 if (flags == 0) {
1126 return is_utf8_string_loclen(s, len, ep, el);
1127 }
1128
d044b7a7 1129 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1130 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
1131 {
1132 return is_strict_utf8_string_loclen(s, len, ep, el);
1133 }
1134
d044b7a7 1135 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1136 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1137 {
1138 return is_c9strict_utf8_string_loclen(s, len, ep, el);
1139 }
1140
33756530
KW
1141 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1142 if (el)
1143 *el = len;
1144
1145 if (ep) {
1146 *ep = s + len;
1147 }
1148
1149 return TRUE;
1150 }
1151
1152 {
1153 const U8* send = s + len;
1154 const U8* x = first_variant;
1155 STRLEN outlen = first_variant - s;
1156
a0d7f935
KW
1157 while (x < send) {
1158 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1159 if (UNLIKELY(! cur_len)) {
1160 break;
1161 }
1162 x += cur_len;
1163 outlen++;
9f2abfde 1164 }
9f2abfde 1165
a0d7f935
KW
1166 if (el)
1167 *el = outlen;
9f2abfde 1168
a0d7f935
KW
1169 if (ep) {
1170 *ep = x;
1171 }
9f2abfde 1172
a0d7f935 1173 return (x == send);
33756530 1174 }
9f2abfde
KW
1175}
1176
1177/*
7c93d8f0
KW
1178=for apidoc utf8_distance
1179
1180Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1181and C<b>.
1182
1183WARNING: use only if you *know* that the pointers point inside the
1184same UTF-8 buffer.
1185
1186=cut
1187*/
1188
1189PERL_STATIC_INLINE IV
1190Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1191{
1192 PERL_ARGS_ASSERT_UTF8_DISTANCE;
1193
1194 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1195}
1196
1197/*
1198=for apidoc utf8_hop
1199
1200Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1201forward or backward.
1202
1203WARNING: do not use the following unless you *know* C<off> is within
1204the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1205on the first byte of character or just after the last byte of a character.
1206
1207=cut
1208*/
1209
1210PERL_STATIC_INLINE U8 *
1211Perl_utf8_hop(const U8 *s, SSize_t off)
1212{
1213 PERL_ARGS_ASSERT_UTF8_HOP;
1214
1215 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1216 * the bitops (especially ~) can create illegal UTF-8.
1217 * In other words: in Perl UTF-8 is not just for Unicode. */
1218
1219 if (off >= 0) {
1220 while (off--)
1221 s += UTF8SKIP(s);
1222 }
1223 else {
1224 while (off++) {
1225 s--;
1226 while (UTF8_IS_CONTINUATION(*s))
1227 s--;
1228 }
1229 }
de979548 1230 GCC_DIAG_IGNORE(-Wcast-qual);
7c93d8f0 1231 return (U8 *)s;
de979548 1232 GCC_DIAG_RESTORE;
7c93d8f0
KW
1233}
1234
4dab108f 1235/*
65df57a8
TC
1236=for apidoc utf8_hop_forward
1237
1238Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1239forward.
1240
1241C<off> must be non-negative.
1242
1243C<s> must be before or equal to C<end>.
1244
1245When moving forward it will not move beyond C<end>.
1246
1247Will not exceed this limit even if the string is not valid "UTF-8".
1248
1249=cut
1250*/
1251
1252PERL_STATIC_INLINE U8 *
1253Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1254{
1255 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1256
1257 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1258 * the bitops (especially ~) can create illegal UTF-8.
1259 * In other words: in Perl UTF-8 is not just for Unicode. */
1260
1261 assert(s <= end);
1262 assert(off >= 0);
1263
1264 while (off--) {
1265 STRLEN skip = UTF8SKIP(s);
de979548
P
1266 if ((STRLEN)(end - s) <= skip) {
1267 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 1268 return (U8 *)end;
de979548
P
1269 GCC_DIAG_RESTORE;
1270 }
65df57a8
TC
1271 s += skip;
1272 }
1273
de979548 1274 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 1275 return (U8 *)s;
de979548 1276 GCC_DIAG_RESTORE;
65df57a8
TC
1277}
1278
1279/*
1280=for apidoc utf8_hop_back
1281
1282Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1283backward.
1284
1285C<off> must be non-positive.
1286
1287C<s> must be after or equal to C<start>.
1288
1289When moving backward it will not move before C<start>.
1290
1291Will not exceed this limit even if the string is not valid "UTF-8".
1292
1293=cut
1294*/
1295
1296PERL_STATIC_INLINE U8 *
1297Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1298{
1299 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1300
1301 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1302 * the bitops (especially ~) can create illegal UTF-8.
1303 * In other words: in Perl UTF-8 is not just for Unicode. */
1304
1305 assert(start <= s);
1306 assert(off <= 0);
1307
1308 while (off++ && s > start) {
1309 s--;
1310 while (UTF8_IS_CONTINUATION(*s) && s > start)
1311 s--;
1312 }
1313
de979548 1314 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 1315 return (U8 *)s;
de979548 1316 GCC_DIAG_RESTORE;
65df57a8
TC
1317}
1318
1319/*
1320=for apidoc utf8_hop_safe
1321
1322Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1323either forward or backward.
1324
1325When moving backward it will not move before C<start>.
1326
1327When moving forward it will not move beyond C<end>.
1328
1329Will not exceed those limits even if the string is not valid "UTF-8".
1330
1331=cut
1332*/
1333
1334PERL_STATIC_INLINE U8 *
1335Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1336{
1337 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1338
1339 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1340 * the bitops (especially ~) can create illegal UTF-8.
1341 * In other words: in Perl UTF-8 is not just for Unicode. */
1342
1343 assert(start <= s && s <= end);
1344
1345 if (off >= 0) {
1346 return utf8_hop_forward(s, off, end);
1347 }
1348 else {
1349 return utf8_hop_back(s, off, start);
1350 }
1351}
1352
1353/*
4dab108f
KW
1354
1355=for apidoc is_utf8_valid_partial_char
1356
6cbb9248
KW
1357Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1358S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1359points. Otherwise, it returns 1 if there exists at least one non-empty
1360sequence of bytes that when appended to sequence C<s>, starting at position
1361C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1362otherwise returns 0.
1363
1364In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1365point.
1366
1367This is useful when a fixed-length buffer is being tested for being well-formed
1368UTF-8, but the final few bytes in it don't comprise a full character; that is,
1369it is split somewhere in the middle of the final code point's UTF-8
1370representation. (Presumably when the buffer is refreshed with the next chunk
1371of data, the new first bytes will complete the partial code point.) This
1372function is used to verify that the final bytes in the current buffer are in
1373fact the legal beginning of some code point, so that if they aren't, the
1374failure can be signalled without having to wait for the next read.
4dab108f
KW
1375
1376=cut
1377*/
2717076a
KW
1378#define is_utf8_valid_partial_char(s, e) \
1379 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1380
1381/*
1382
1383=for apidoc is_utf8_valid_partial_char_flags
1384
1385Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1386or not the input is a valid UTF-8 encoded partial character, but it takes an
1387extra parameter, C<flags>, which can further restrict which code points are
1388considered valid.
1389
1390If C<flags> is 0, this behaves identically to
1391C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1392of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1393there is any sequence of bytes that can complete the input partial character in
1394such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1395TRUE; otherwise FALSE. Non character code points cannot be determined based on
1396partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1397determined from just the first one or two bytes.
1398
1399=cut
1400 */
1401
56e4cf64 1402PERL_STATIC_INLINE bool
f1c999a7 1403S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1404{
f1c999a7 1405 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1406
f1c999a7 1407 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1408 |UTF8_DISALLOW_PERL_EXTENDED)));
4dab108f 1409
8875bd48 1410 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1411 return FALSE;
1412 }
1413
f1c999a7 1414 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1415}
1416
8bc127bf
KW
1417/*
1418
1419=for apidoc is_utf8_fixed_width_buf_flags
1420
1421Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1422is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1423otherwise it returns FALSE.
1424
1425If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1426without restriction. If the final few bytes of the buffer do not form a
1427complete code point, this will return TRUE anyway, provided that
1428C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1429
1430If C<flags> in non-zero, it can be any combination of the
1431C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1432same meanings.
1433
1434This function differs from C<L</is_utf8_string_flags>> only in that the latter
1435returns FALSE if the final few bytes of the string don't form a complete code
1436point.
1437
1438=cut
1439 */
1440#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1441 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1442
1443/*
1444
1445=for apidoc is_utf8_fixed_width_buf_loc_flags
1446
1447Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1448failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1449to the beginning of any partial character at the end of the buffer; if there is
1450no partial character C<*ep> will contain C<s>+C<len>.
1451
1452See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1453
1454=cut
1455*/
1456
1457#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1458 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1459
1460/*
1461
1462=for apidoc is_utf8_fixed_width_buf_loclen_flags
1463
1464Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1465complete, valid characters found in the C<el> pointer.
1466
1467=cut
1468*/
1469
1470PERL_STATIC_INLINE bool
1471S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
33756530 1472 STRLEN len,
8bc127bf
KW
1473 const U8 **ep,
1474 STRLEN *el,
1475 const U32 flags)
1476{
1477 const U8 * maybe_partial;
1478
1479 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1480
1481 if (! ep) {
1482 ep = &maybe_partial;
1483 }
1484
1485 /* If it's entirely valid, return that; otherwise see if the only error is
1486 * that the final few bytes are for a partial character */
1487 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1488 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1489}
1490
c8028aa6
TC
1491/* ------------------------------- perl.h ----------------------------- */
1492
1493/*
dcccc8ff
KW
1494=head1 Miscellaneous Functions
1495
41188aa0 1496=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1497
6602b933 1498Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1499If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1500
1501Return TRUE if the name is safe.
1502
796b6530 1503Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1504
1505=cut
1506*/
1507
1508PERL_STATIC_INLINE bool
41188aa0 1509S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1510 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1511 * perl itself uses xce*() functions which accept 8-bit strings.
1512 */
1513
1514 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1515
6c4650b3 1516 if (len > 1) {
c8028aa6 1517 char *null_at;
41188aa0 1518 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1519 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1520 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1521 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1522 what, op_name, pv, null_at+1);
c8028aa6
TC
1523 return FALSE;
1524 }
1525 }
1526
1527 return TRUE;
1528}
1529
1530/*
7cb3f959
TC
1531
1532Return true if the supplied filename has a newline character
fa6c7d00 1533immediately before the first (hopefully only) NUL.
7cb3f959
TC
1534
1535My original look at this incorrectly used the len from SvPV(), but
1536that's incorrect, since we allow for a NUL in pv[len-1].
1537
1538So instead, strlen() and work from there.
1539
1540This allow for the user reading a filename, forgetting to chomp it,
1541then calling:
1542
1543 open my $foo, "$file\0";
1544
1545*/
1546
1547#ifdef PERL_CORE
1548
1549PERL_STATIC_INLINE bool
1550S_should_warn_nl(const char *pv) {
1551 STRLEN len;
1552
1553 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1554
1555 len = strlen(pv);
1556
1557 return len > 0 && pv[len-1] == '\n';
1558}
1559
1560#endif
1561
81d52ecd
JH
1562/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1563
1564#define MAX_CHARSET_NAME_LENGTH 2
1565
1566PERL_STATIC_INLINE const char *
1567get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1568{
1569 /* Returns a string that corresponds to the name of the regex character set
1570 * given by 'flags', and *lenp is set the length of that string, which
1571 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1572
1573 *lenp = 1;
1574 switch (get_regex_charset(flags)) {
1575 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1576 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1577 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1578 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1579 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1580 *lenp = 2;
1581 return ASCII_MORE_RESTRICT_PAT_MODS;
1582 }
1583 /* The NOT_REACHED; hides an assert() which has a rather complex
1584 * definition in perl.h. */
1585 NOT_REACHED; /* NOTREACHED */
1586 return "?"; /* Unknown */
1587}
1588
7cb3f959 1589/*
ed382232
TC
1590
1591Return false if any get magic is on the SV other than taint magic.
1592
1593*/
1594
1595PERL_STATIC_INLINE bool
1596S_sv_only_taint_gmagic(SV *sv) {
1597 MAGIC *mg = SvMAGIC(sv);
1598
1599 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1600
1601 while (mg) {
1602 if (mg->mg_type != PERL_MAGIC_taint
1603 && !(mg->mg_flags & MGf_GSKIP)
1604 && mg->mg_virtual->svt_get) {
1605 return FALSE;
1606 }
1607 mg = mg->mg_moremagic;
1608 }
1609
1610 return TRUE;
1611}
1612
ed8ff0f3
DM
1613/* ------------------ cop.h ------------------------------------------- */
1614
1615
1616/* Enter a block. Push a new base context and return its address. */
1617
1618PERL_STATIC_INLINE PERL_CONTEXT *
1619S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1620{
1621 PERL_CONTEXT * cx;
1622
1623 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1624
1625 CXINC;
1626 cx = CX_CUR();
1627 cx->cx_type = type;
1628 cx->blk_gimme = gimme;
1629 cx->blk_oldsaveix = saveix;
4caf7d8c 1630 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1631 cx->blk_oldcop = PL_curcop;
4caf7d8c 1632 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1633 cx->blk_oldscopesp = PL_scopestack_ix;
1634 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1635 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1636
1637 PL_tmps_floor = PL_tmps_ix;
1638 CX_DEBUG(cx, "PUSH");
1639 return cx;
1640}
1641
1642
1643/* Exit a block (RETURN and LAST). */
1644
1645PERL_STATIC_INLINE void
1646S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1647{
1648 PERL_ARGS_ASSERT_CX_POPBLOCK;
1649
1650 CX_DEBUG(cx, "POP");
1651 /* these 3 are common to cx_popblock and cx_topblock */
1652 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1653 PL_scopestack_ix = cx->blk_oldscopesp;
1654 PL_curpm = cx->blk_oldpm;
1655
1656 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1657 * and leaves a CX entry lying around for repeated use, so
1658 * skip for multicall */ \
1659 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1660 || PL_savestack_ix == cx->blk_oldsaveix);
1661 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1662 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1663}
1664
1665/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1666 * Whereas cx_popblock() restores the state to the point just before
1667 * cx_pushblock() was called, cx_topblock() restores it to the point just
1668 * *after* cx_pushblock() was called. */
1669
1670PERL_STATIC_INLINE void
1671S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1672{
1673 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1674
1675 CX_DEBUG(cx, "TOP");
1676 /* these 3 are common to cx_popblock and cx_topblock */
1677 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1678 PL_scopestack_ix = cx->blk_oldscopesp;
1679 PL_curpm = cx->blk_oldpm;
1680
1681 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1682}
1683
1684
a73d8813
DM
1685PERL_STATIC_INLINE void
1686S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1687{
1688 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1689
1690 PERL_ARGS_ASSERT_CX_PUSHSUB;
1691
3f6bd23a 1692 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1693 cx->blk_sub.cv = cv;
1694 cx->blk_sub.olddepth = CvDEPTH(cv);
1695 cx->blk_sub.prevcomppad = PL_comppad;
1696 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1697 cx->blk_sub.retop = retop;
1698 SvREFCNT_inc_simple_void_NN(cv);
1699 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1700}
1701
1702
1703/* subsets of cx_popsub() */
1704
1705PERL_STATIC_INLINE void
1706S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1707{
1708 CV *cv;
1709
1710 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1711 assert(CxTYPE(cx) == CXt_SUB);
1712
1713 PL_comppad = cx->blk_sub.prevcomppad;
1714 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1715 cv = cx->blk_sub.cv;
1716 CvDEPTH(cv) = cx->blk_sub.olddepth;
1717 cx->blk_sub.cv = NULL;
1718 SvREFCNT_dec(cv);
1719}
1720
1721
1722/* handle the @_ part of leaving a sub */
1723
1724PERL_STATIC_INLINE void
1725S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1726{
1727 AV *av;
1728
1729 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1730 assert(CxTYPE(cx) == CXt_SUB);
1731 assert(AvARRAY(MUTABLE_AV(
1732 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1733 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1734
1735 CX_POP_SAVEARRAY(cx);
1736 av = MUTABLE_AV(PAD_SVl(0));
1737 if (UNLIKELY(AvREAL(av)))
1738 /* abandon @_ if it got reified */
1739 clear_defarray(av, 0);
1740 else {
1741 CLEAR_ARGARRAY(av);
1742 }
1743}
1744
1745
1746PERL_STATIC_INLINE void
1747S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1748{
1749 PERL_ARGS_ASSERT_CX_POPSUB;
1750 assert(CxTYPE(cx) == CXt_SUB);
1751
3f6bd23a 1752 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1753
1754 if (CxHASARGS(cx))
1755 cx_popsub_args(cx);
1756 cx_popsub_common(cx);
1757}
1758
1759
6a7d52cc
DM
1760PERL_STATIC_INLINE void
1761S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1762{
1763 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1764
1765 cx->blk_format.cv = cv;
1766 cx->blk_format.retop = retop;
1767 cx->blk_format.gv = gv;
1768 cx->blk_format.dfoutgv = PL_defoutgv;
1769 cx->blk_format.prevcomppad = PL_comppad;
1770 cx->blk_u16 = 0;
1771
1772 SvREFCNT_inc_simple_void_NN(cv);
1773 CvDEPTH(cv)++;
1774 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1775}
1776
1777
1778PERL_STATIC_INLINE void
1779S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1780{
1781 CV *cv;
1782 GV *dfout;
1783
1784 PERL_ARGS_ASSERT_CX_POPFORMAT;
1785 assert(CxTYPE(cx) == CXt_FORMAT);
1786
1787 dfout = cx->blk_format.dfoutgv;
1788 setdefout(dfout);
1789 cx->blk_format.dfoutgv = NULL;
1790 SvREFCNT_dec_NN(dfout);
1791
1792 PL_comppad = cx->blk_format.prevcomppad;
1793 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1794 cv = cx->blk_format.cv;
1795 cx->blk_format.cv = NULL;
1796 --CvDEPTH(cv);
1797 SvREFCNT_dec_NN(cv);
1798}
1799
1800
13febba5
DM
1801PERL_STATIC_INLINE void
1802S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1803{
1804 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1805
1806 cx->blk_eval.retop = retop;
1807 cx->blk_eval.old_namesv = namesv;
1808 cx->blk_eval.old_eval_root = PL_eval_root;
1809 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1810 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1811 cx->blk_eval.cur_top_env = PL_top_env;
1812
4c57ced5 1813 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1814 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1815 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1816}
1817
1818
1819PERL_STATIC_INLINE void
1820S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1821{
1822 SV *sv;
1823
1824 PERL_ARGS_ASSERT_CX_POPEVAL;
1825 assert(CxTYPE(cx) == CXt_EVAL);
1826
1827 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1828 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1829 PL_eval_root = cx->blk_eval.old_eval_root;
1830 sv = cx->blk_eval.cur_text;
4c57ced5 1831 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1832 cx->blk_eval.cur_text = NULL;
1833 SvREFCNT_dec_NN(sv);
1834 }
1835
1836 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1837 if (sv) {
1838 cx->blk_eval.old_namesv = NULL;
1839 SvREFCNT_dec_NN(sv);
1840 }
13febba5 1841}
6a7d52cc 1842
a73d8813 1843
d1b6bf72
DM
1844/* push a plain loop, i.e.
1845 * { block }
1846 * while (cond) { block }
1847 * for (init;cond;continue) { block }
1848 * This loop can be last/redo'ed etc.
1849 */
1850
1851PERL_STATIC_INLINE void
1852S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1853{
1854 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1855 cx->blk_loop.my_op = cLOOP;
1856}
1857
1858
1859/* push a true for loop, i.e.
1860 * for var (list) { block }
1861 */
1862
1863PERL_STATIC_INLINE void
1864S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1865{
1866 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1867
1868 /* this one line is common with cx_pushloop_plain */
1869 cx->blk_loop.my_op = cLOOP;
1870
1871 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1872 cx->blk_loop.itersave = itersave;
1873#ifdef USE_ITHREADS
1874 cx->blk_loop.oldcomppad = PL_comppad;
1875#endif
1876}
1877
1878
1879/* pop all loop types, including plain */
1880
1881PERL_STATIC_INLINE void
1882S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1883{
1884 PERL_ARGS_ASSERT_CX_POPLOOP;
1885
1886 assert(CxTYPE_is_LOOP(cx));
1887 if ( CxTYPE(cx) == CXt_LOOP_ARY
1888 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1889 {
1890 /* Free ary or cur. This assumes that state_u.ary.ary
1891 * aligns with state_u.lazysv.cur. See cx_dup() */
1892 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1893 cx->blk_loop.state_u.lazysv.cur = NULL;
1894 SvREFCNT_dec_NN(sv);
1895 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1896 sv = cx->blk_loop.state_u.lazysv.end;
1897 cx->blk_loop.state_u.lazysv.end = NULL;
1898 SvREFCNT_dec_NN(sv);
1899 }
1900 }
1901 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1902 SV *cursv;
1903 SV **svp = (cx)->blk_loop.itervar_u.svp;
1904 if ((cx->cx_type & CXp_FOR_GV))
1905 svp = &GvSV((GV*)svp);
1906 cursv = *svp;
1907 *svp = cx->blk_loop.itersave;
1908 cx->blk_loop.itersave = NULL;
1909 SvREFCNT_dec(cursv);
1910 }
1911}
1912
2a7b7c61
DM
1913
1914PERL_STATIC_INLINE void
1915S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1916{
1917 PERL_ARGS_ASSERT_CX_PUSHWHEN;
1918
1919 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1920}
1921
1922
1923PERL_STATIC_INLINE void
1924S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1925{
1926 PERL_ARGS_ASSERT_CX_POPWHEN;
1927 assert(CxTYPE(cx) == CXt_WHEN);
1928
1929 PERL_UNUSED_ARG(cx);
59a14f30 1930 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1931 /* currently NOOP */
1932}
1933
1934
1935PERL_STATIC_INLINE void
1936S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1937{
1938 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1939
1940 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1941 cx->blk_givwhen.defsv_save = orig_defsv;
1942}
1943
1944
1945PERL_STATIC_INLINE void
1946S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1947{
1948 SV *sv;
1949
1950 PERL_ARGS_ASSERT_CX_POPGIVEN;
1951 assert(CxTYPE(cx) == CXt_GIVEN);
1952
1953 sv = GvSV(PL_defgv);
1954 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1955 cx->blk_givwhen.defsv_save = NULL;
1956 SvREFCNT_dec(sv);
1957}
1958
ec2c235b
KW
1959/* ------------------ util.h ------------------------------------------- */
1960
1961/*
1962=head1 Miscellaneous Functions
1963
1964=for apidoc foldEQ
1965
1966Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1967same
1968case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
1969match themselves and their opposite case counterparts. Non-cased and non-ASCII
1970range bytes match only themselves.
1971
1972=cut
1973*/
1974
1975PERL_STATIC_INLINE I32
1976Perl_foldEQ(const char *s1, const char *s2, I32 len)
1977{
1978 const U8 *a = (const U8 *)s1;
1979 const U8 *b = (const U8 *)s2;
1980
1981 PERL_ARGS_ASSERT_FOLDEQ;
1982
1983 assert(len >= 0);
1984
1985 while (len--) {
1986 if (*a != *b && *a != PL_fold[*b])
1987 return 0;
1988 a++,b++;
1989 }
1990 return 1;
1991}
1992
0f9cb40c 1993PERL_STATIC_INLINE I32
ec2c235b
KW
1994Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
1995{
1996 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
1997 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
1998 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
1999 * does it check that the strings each have at least 'len' characters */
2000
2001 const U8 *a = (const U8 *)s1;
2002 const U8 *b = (const U8 *)s2;
2003
2004 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
2005
2006 assert(len >= 0);
2007
2008 while (len--) {
2009 if (*a != *b && *a != PL_fold_latin1[*b]) {
2010 return 0;
2011 }
2012 a++, b++;
2013 }
2014 return 1;
2015}
2016
2017/*
2018=for apidoc foldEQ_locale
2019
2020Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2021same case-insensitively in the current locale; false otherwise.
2022
2023=cut
2024*/
2025
0f9cb40c 2026PERL_STATIC_INLINE I32
ec2c235b
KW
2027Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
2028{
2029 dVAR;
2030 const U8 *a = (const U8 *)s1;
2031 const U8 *b = (const U8 *)s2;
2032
2033 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
2034
2035 assert(len >= 0);
2036
2037 while (len--) {
2038 if (*a != *b && *a != PL_fold_locale[*b])
2039 return 0;
2040 a++,b++;
2041 }
2042 return 1;
2043}
2044
6dba01e2
KW
2045#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
2046
2047PERL_STATIC_INLINE void *
2048S_my_memrchr(const char * s, const char c, const STRLEN len)
2049{
2050 /* memrchr(), since many platforms lack it */
2051
2052 const char * t = s + len - 1;
2053
2054 PERL_ARGS_ASSERT_MY_MEMRCHR;
2055
2056 while (t >= s) {
2057 if (*t == c) {
2058 return (void *) t;
2059 }
2060 t--;
2061 }
2062
2063 return NULL;
2064}
2065
2066#endif
2067
ed382232 2068/*
c8028aa6
TC
2069 * ex: set ts=8 sts=4 sw=4 et:
2070 */