This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
inline.h: Add comment.
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
df6b4bd5 156 XPV* const p = (XPV*)SvANY(re);
8d919b0a 157 assert(isREGEXP(re));
df6b4bd5
DM
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
8d919b0a
FC
160}
161
27669aa4
FC
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
2439e033 167 if (LIKELY(sv != NULL))
27669aa4
FC
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
2439e033 180 if (LIKELY(sv != NULL))
27669aa4
FC
181 SvREFCNT(sv)++;
182}
75e16a44
FC
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
2439e033 186 if (LIKELY(sv != NULL)) {
75a9bf96 187 U32 rc = SvREFCNT(sv);
79e2a32a 188 if (LIKELY(rc > 1))
75a9bf96
DM
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
192 }
193}
541377b1
FC
194
195PERL_STATIC_INLINE void
4a9a56a7
DM
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
79e2a32a 199 if (LIKELY(rc > 1))
4a9a56a7
DM
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
541377b1
FC
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
541377b1
FC
219S_SvPADSTALE_on(SV *sv)
220{
c0683843 221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
c0683843 227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
25fdce4a 230#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 231PERL_STATIC_INLINE STRLEN
6964422a 232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 233{
25fdce4a 234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
f019c49e 243
d1decf2b
TC
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
7347ee54 247GCC_DIAG_IGNORE_DECL(-Wunused-function);
d1decf2b
TC
248static void
249S_croak_memory_wrap(void)
250{
251 Perl_croak_nocontext("%s",PL_memory_wrap);
252}
7347ee54 253GCC_DIAG_RESTORE_DECL;
d1decf2b 254
a8a2ceaa
KW
255/* ------------------------------- utf8.h ------------------------------- */
256
2fe720e2
KW
257/*
258=head1 Unicode Support
259*/
260
55d09dc8
KW
261PERL_STATIC_INLINE void
262S_append_utf8_from_native_byte(const U8 byte, U8** dest)
263{
264 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
265 * encoded string at '*dest', updating '*dest' to include it */
266
55d09dc8
KW
267 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
268
6f2d5cbc 269 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 270 *((*dest)++) = byte;
55d09dc8 271 else {
a09ec51a
KW
272 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
273 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
274 }
275}
276
e123187a 277/*
2fe720e2 278=for apidoc valid_utf8_to_uvchr
2717076a 279Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
280the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
281it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
282non-Unicode code points are allowed.
283
284=cut
285
286 */
287
288PERL_STATIC_INLINE UV
289Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
290{
c41b2540 291 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
292 const U8* send = s + expectlen;
293 UV uv = *s;
294
295 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
296
297 if (retlen) {
298 *retlen = expectlen;
299 }
300
301 /* An invariant is trivially returned */
302 if (expectlen == 1) {
303 return uv;
304 }
305
306 /* Remove the leading bits that indicate the number of bytes, leaving just
307 * the bits that are part of the value */
308 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
309
310 /* Now, loop through the remaining bytes, accumulating each into the
311 * working total as we go. (I khw tried unrolling the loop for up to 4
312 * bytes, but there was no performance improvement) */
313 for (++s; s < send; s++) {
314 uv = UTF8_ACCUMULATE(uv, *s);
315 }
316
317 return UNI_TO_NATIVE(uv);
318
319}
320
1e599354
KW
321/*
322=for apidoc is_utf8_invariant_string
323
82c5d941 324Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 325regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
326EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
327are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
328the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
329characters are invariant, but so also are the C1 controls.
1e599354
KW
330
331If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
332use this option, that C<s> can't have embedded C<NUL> characters and has to
333have a terminating C<NUL> byte).
334
9f2abfde
KW
335See also
336C<L</is_utf8_string>>,
337C<L</is_utf8_string_flags>>,
338C<L</is_utf8_string_loc>>,
339C<L</is_utf8_string_loc_flags>>,
340C<L</is_utf8_string_loclen>>,
341C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
342C<L</is_utf8_fixed_width_buf_flags>>,
343C<L</is_utf8_fixed_width_buf_loc_flags>>,
344C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
345C<L</is_strict_utf8_string>>,
346C<L</is_strict_utf8_string_loc>>,
347C<L</is_strict_utf8_string_loclen>>,
348C<L</is_c9strict_utf8_string>>,
349C<L</is_c9strict_utf8_string_loc>>,
350and
351C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
352
353=cut
0cbf5865
KW
354
355*/
356
357#define is_utf8_invariant_string(s, len) \
358 is_utf8_invariant_string_loc(s, len, NULL)
359
360/*
361=for apidoc is_utf8_invariant_string_loc
362
363Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
364the first UTF-8 variant character in the C<ep> pointer; if all characters are
365UTF-8 invariant, this function does not change the contents of C<*ep>.
366
367=cut
368
1e599354
KW
369*/
370
371PERL_STATIC_INLINE bool
e17544a6 372S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1e599354 373{
e17544a6 374 const U8* send;
1e599354
KW
375 const U8* x = s;
376
0cbf5865
KW
377 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
378
e17544a6
KW
379 if (len == 0) {
380 len = strlen((const char *)s);
381 }
382
383 send = s + len;
384
385#ifndef EBCDIC
4ab2fd9b
KW
386
387/* This looks like 0x010101... */
388#define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
389
390/* This looks like 0x808080... */
391#define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
392#define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
393#define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
e17544a6 394
099e59a4
KW
395/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
396 * or'ing together the lowest bits of 'x'. Hopefully the final term gets
397 * optimized out completely on a 32-bit system, and its mask gets optimized out
398 * on a 64-bit system */
5eabe374
KW
399#define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
400 | ( PTR2nat(x) >> 1) \
401 | ( ( (PTR2nat(x) \
402 & PERL_WORD_BOUNDARY_MASK) >> 2))))
099e59a4
KW
403
404 /* Do the word-at-a-time iff there is at least one usable full word. That
405 * means that after advancing to a word boundary, there still is at least a
406 * full word left. The number of bytes needed to advance is 'wordsize -
407 * offset' unless offset is 0. */
408 if ((STRLEN) (send - x) >= PERL_WORDSIZE
409
410 /* This term is wordsize if subword; 0 if not */
411 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
412
413 /* 'offset' */
414 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
415 {
b40579ff 416
46bb68f6
KW
417 /* Process per-byte until reach word boundary. XXX This loop could be
418 * eliminated if we knew that this platform had fast unaligned reads */
b40579ff 419 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
46bb68f6
KW
420 if (! UTF8_IS_INVARIANT(*x)) {
421 if (ep) {
422 *ep = x;
423 }
e17544a6 424
46bb68f6
KW
425 return FALSE;
426 }
427 x++;
e17544a6 428 }
e17544a6 429
099e59a4
KW
430 /* Here, we know we have at least one full word to process. Process
431 * per-word as long as we have at least a full word left */
432 do {
4ab2fd9b 433 if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
e17544a6 434
46bb68f6
KW
435 /* Found a variant. Just return if caller doesn't want its
436 * exact position */
437 if (! ep) {
438 return FALSE;
439 }
e17544a6 440
46bb68f6
KW
441 /* Otherwise fall into final loop to find which byte it is */
442 break;
443 }
444 x += PERL_WORDSIZE;
099e59a4 445 } while (x + PERL_WORDSIZE <= send);
b40579ff 446 }
e17544a6 447
e17544a6
KW
448#endif
449
450 /* Process per-byte */
451 while (x < send) {
452 if (! UTF8_IS_INVARIANT(*x)) {
453 if (ep) {
454 *ep = x;
455 }
0cbf5865 456
e17544a6 457 return FALSE;
0cbf5865 458 }
1e599354 459
e17544a6 460 x++;
1e599354
KW
461 }
462
463 return TRUE;
464}
465
03c1e4ab
KW
466#if defined(PERL_CORE) || defined(PERL_EXT)
467
468/*
469=for apidoc variant_under_utf8_count
470
471This function looks at the sequence of bytes between C<s> and C<e>, which are
472assumed to be encoded in ASCII/Latin1, and returns how many of them would
473change should the string be translated into UTF-8. Due to the nature of UTF-8,
474each of these would occupy two bytes instead of the single one in the input
475string. Thus, this function returns the precise number of bytes the string
476would expand by when translated to UTF-8.
477
478Unlike most of the other functions that have C<utf8> in their name, the input
479to this function is NOT a UTF-8-encoded string. The function name is slightly
480I<odd> to emphasize this.
481
482This function is internal to Perl because khw thinks that any XS code that
483would want this is probably operating too close to the internals. Presenting a
484valid use case could change that.
485
486See also
487C<L<perlapi/is_utf8_invariant_string>>
488and
489C<L<perlapi/is_utf8_invariant_string_loc>>,
490
491=cut
492
493*/
494
495PERL_STATIC_INLINE Size_t
496S_variant_under_utf8_count(const U8* const s, const U8* const e)
497{
498 const U8* x = s;
499 Size_t count = 0;
500
501 PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
502
503# ifndef EBCDIC
504
5d0379de
KW
505 /* Test if the string is long enough to use word-at-a-time. (Logic is the
506 * same as for is_utf8_invariant_string()) */
03c1e4ab
KW
507 if ((STRLEN) (e - x) >= PERL_WORDSIZE
508 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
509 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
510 {
511
512 /* Process per-byte until reach word boundary. XXX This loop could be
513 * eliminated if we knew that this platform had fast unaligned reads */
514 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
515 count += ! UTF8_IS_INVARIANT(*x++);
516 }
517
518 /* Process per-word as long as we have at least a full word left */
74472cc2
KW
519 do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
520 explanation of how this works */
03c1e4ab
KW
521 count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
522 * PERL_COUNT_MULTIPLIER)
523 >> ((PERL_WORDSIZE - 1) * CHARBITS);
524 x += PERL_WORDSIZE;
525 } while (x + PERL_WORDSIZE <= e);
526 }
527
528# endif
529
530 /* Process per-byte */
531 while (x < e) {
532 if (! UTF8_IS_INVARIANT(*x)) {
533 count++;
534 }
535
536 x++;
537 }
538
539 return count;
540}
541
542#endif
543
aff4cafe
KW
544#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
545# undef PERL_WORDSIZE
546# undef PERL_COUNT_MULTIPLIER
547# undef PERL_WORD_BOUNDARY_MASK
548# undef PERL_VARIANTS_WORD_MASK
549#endif
03c1e4ab 550
7c93d8f0 551/*
5ff889fb
KW
552=for apidoc is_utf8_string
553
82c5d941
KW
554Returns TRUE if the first C<len> bytes of string C<s> form a valid
555Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
556be calculated using C<strlen(s)> (which means if you use this option, that C<s>
557can't have embedded C<NUL> characters and has to have a terminating C<NUL>
558byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
559
2717076a
KW
560This function considers Perl's extended UTF-8 to be valid. That means that
561code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
562considered valid by this function. Use C<L</is_strict_utf8_string>>,
563C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
564code points are considered valid.
5ff889fb 565
9f2abfde
KW
566See also
567C<L</is_utf8_invariant_string>>,
0cbf5865 568C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
569C<L</is_utf8_string_loc>>,
570C<L</is_utf8_string_loclen>>,
8bc127bf
KW
571C<L</is_utf8_fixed_width_buf_flags>>,
572C<L</is_utf8_fixed_width_buf_loc_flags>>,
573C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
574
575=cut
576*/
577
dd237e82 578#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
5ff889fb 579
c9cd936b
KW
580#if defined(PERL_CORE) || defined (PERL_EXT)
581
582/*
583=for apidoc is_utf8_non_invariant_string
584
585Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
586C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
587UTF-8; otherwise returns FALSE.
588
589A TRUE return means that at least one code point represented by the sequence
590either is a wide character not representable as a single byte, or the
591representation differs depending on whether the sequence is encoded in UTF-8 or
592not.
593
594See also
595C<L<perlapi/is_utf8_invariant_string>>,
596C<L<perlapi/is_utf8_string>>
597
598=cut
599
600This is commonly used to determine if a SV's UTF-8 flag should be turned on.
601It needn't be if its string is entirely UTF-8 invariant, and it shouldn't be if
602it otherwise contains invalid UTF-8.
603
604It is an internal function because khw thinks that XS code shouldn't be working
605at this low a level. A valid use case could change that.
606
607*/
608
609PERL_STATIC_INLINE bool
610S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
611{
612 const U8 * first_variant;
613
614 PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
615
616 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
617 return FALSE;
618 }
619
620 return is_utf8_string(first_variant, len - (first_variant - s));
621}
622
623#endif
624
5ff889fb 625/*
9f2abfde
KW
626=for apidoc is_strict_utf8_string
627
628Returns TRUE if the first C<len> bytes of string C<s> form a valid
629UTF-8-encoded string that is fully interchangeable by any application using
630Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
631calculated using C<strlen(s)> (which means if you use this option, that C<s>
632can't have embedded C<NUL> characters and has to have a terminating C<NUL>
633byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
634
635This function returns FALSE for strings containing any
636code points above the Unicode max of 0x10FFFF, surrogate code points, or
637non-character code points.
638
639See also
640C<L</is_utf8_invariant_string>>,
0cbf5865 641C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
642C<L</is_utf8_string>>,
643C<L</is_utf8_string_flags>>,
644C<L</is_utf8_string_loc>>,
645C<L</is_utf8_string_loc_flags>>,
646C<L</is_utf8_string_loclen>>,
647C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
648C<L</is_utf8_fixed_width_buf_flags>>,
649C<L</is_utf8_fixed_width_buf_loc_flags>>,
650C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
651C<L</is_strict_utf8_string_loc>>,
652C<L</is_strict_utf8_string_loclen>>,
653C<L</is_c9strict_utf8_string>>,
654C<L</is_c9strict_utf8_string_loc>>,
655and
656C<L</is_c9strict_utf8_string_loclen>>.
657
658=cut
659*/
660
dd237e82 661#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
9f2abfde
KW
662
663/*
664=for apidoc is_c9strict_utf8_string
665
666Returns TRUE if the first C<len> bytes of string C<s> form a valid
667UTF-8-encoded string that conforms to
668L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
669otherwise it returns FALSE. If C<len> is 0, it will be calculated using
670C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
671C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
672characters being ASCII constitute 'a valid UTF-8 string'.
673
674This function returns FALSE for strings containing any code points above the
675Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
676code points per
677L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
678
679See also
680C<L</is_utf8_invariant_string>>,
0cbf5865 681C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
682C<L</is_utf8_string>>,
683C<L</is_utf8_string_flags>>,
684C<L</is_utf8_string_loc>>,
685C<L</is_utf8_string_loc_flags>>,
686C<L</is_utf8_string_loclen>>,
687C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
688C<L</is_utf8_fixed_width_buf_flags>>,
689C<L</is_utf8_fixed_width_buf_loc_flags>>,
690C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
691C<L</is_strict_utf8_string>>,
692C<L</is_strict_utf8_string_loc>>,
693C<L</is_strict_utf8_string_loclen>>,
694C<L</is_c9strict_utf8_string_loc>>,
695and
696C<L</is_c9strict_utf8_string_loclen>>.
697
698=cut
699*/
700
dd237e82 701#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
9f2abfde
KW
702
703/*
704=for apidoc is_utf8_string_flags
705
706Returns TRUE if the first C<len> bytes of string C<s> form a valid
707UTF-8 string, subject to the restrictions imposed by C<flags>;
708returns FALSE otherwise. If C<len> is 0, it will be calculated
709using C<strlen(s)> (which means if you use this option, that C<s> can't have
710embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
711that all characters being ASCII constitute 'a valid UTF-8 string'.
712
713If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
714C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
715as C<L</is_strict_utf8_string>>; and if C<flags> is
716C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
717C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
718combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
719C<L</utf8n_to_uvchr>>, with the same meanings.
720
721See also
722C<L</is_utf8_invariant_string>>,
0cbf5865 723C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
724C<L</is_utf8_string>>,
725C<L</is_utf8_string_loc>>,
726C<L</is_utf8_string_loc_flags>>,
727C<L</is_utf8_string_loclen>>,
728C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
729C<L</is_utf8_fixed_width_buf_flags>>,
730C<L</is_utf8_fixed_width_buf_loc_flags>>,
731C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
732C<L</is_strict_utf8_string>>,
733C<L</is_strict_utf8_string_loc>>,
734C<L</is_strict_utf8_string_loclen>>,
735C<L</is_c9strict_utf8_string>>,
736C<L</is_c9strict_utf8_string_loc>>,
737and
738C<L</is_c9strict_utf8_string_loclen>>.
739
740=cut
741*/
742
743PERL_STATIC_INLINE bool
f60f61fd 744S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
9f2abfde 745{
33756530 746 const U8 * first_variant;
9f2abfde
KW
747
748 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
749 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 750 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 751
f60f61fd
KW
752 if (len == 0) {
753 len = strlen((const char *)s);
754 }
755
9f2abfde
KW
756 if (flags == 0) {
757 return is_utf8_string(s, len);
758 }
759
d044b7a7 760 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
761 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
762 {
763 return is_strict_utf8_string(s, len);
764 }
765
d044b7a7 766 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
767 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
768 {
769 return is_c9strict_utf8_string(s, len);
770 }
771
33756530
KW
772 if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
773 const U8* const send = s + len;
774 const U8* x = first_variant;
775
a0d7f935
KW
776 while (x < send) {
777 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
778 if (UNLIKELY(! cur_len)) {
779 return FALSE;
780 }
781 x += cur_len;
9f2abfde 782 }
33756530 783 }
9f2abfde
KW
784
785 return TRUE;
786}
787
788/*
5ff889fb
KW
789
790=for apidoc is_utf8_string_loc
791
2717076a 792Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 793case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 794"utf8ness success") in the C<ep> pointer.
5ff889fb 795
2717076a 796See also C<L</is_utf8_string_loclen>>.
5ff889fb 797
3964c812
KW
798=cut
799*/
800
801#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
802
803/*
804
5ff889fb
KW
805=for apidoc is_utf8_string_loclen
806
2717076a 807Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 808case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 809"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 810encoded characters in the C<el> pointer.
5ff889fb 811
2717076a 812See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
813
814=cut
815*/
816
56e4cf64 817PERL_STATIC_INLINE bool
33756530 818Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb 819{
33756530 820 const U8 * first_variant;
5ff889fb
KW
821
822 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
823
33756530
KW
824 if (len == 0) {
825 len = strlen((const char *) s);
826 }
827
828 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
829 if (el)
830 *el = len;
831
832 if (ep) {
833 *ep = s + len;
834 }
835
836 return TRUE;
837 }
838
839 {
840 const U8* const send = s + len;
841 const U8* x = first_variant;
842 STRLEN outlen = first_variant - s;
843
a0d7f935
KW
844 while (x < send) {
845 const STRLEN cur_len = isUTF8_CHAR(x, send);
846 if (UNLIKELY(! cur_len)) {
847 break;
848 }
849 x += cur_len;
850 outlen++;
5ff889fb 851 }
5ff889fb 852
a0d7f935
KW
853 if (el)
854 *el = outlen;
5ff889fb 855
a0d7f935
KW
856 if (ep) {
857 *ep = x;
858 }
5ff889fb 859
a0d7f935 860 return (x == send);
33756530 861 }
5ff889fb
KW
862}
863
864/*
9f2abfde
KW
865
866=for apidoc is_strict_utf8_string_loc
867
868Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
869case of "utf8ness failure") or the location C<s>+C<len> (in the case of
870"utf8ness success") in the C<ep> pointer.
871
872See also C<L</is_strict_utf8_string_loclen>>.
873
874=cut
875*/
876
877#define is_strict_utf8_string_loc(s, len, ep) \
878 is_strict_utf8_string_loclen(s, len, ep, 0)
879
880/*
881
882=for apidoc is_strict_utf8_string_loclen
883
884Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
885case of "utf8ness failure") or the location C<s>+C<len> (in the case of
886"utf8ness success") in the C<ep> pointer, and the number of UTF-8
887encoded characters in the C<el> pointer.
888
889See also C<L</is_strict_utf8_string_loc>>.
890
891=cut
892*/
893
894PERL_STATIC_INLINE bool
33756530 895S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 896{
33756530 897 const U8 * first_variant;
9f2abfde
KW
898
899 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
900
33756530
KW
901 if (len == 0) {
902 len = strlen((const char *) s);
903 }
904
905 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
906 if (el)
907 *el = len;
908
909 if (ep) {
910 *ep = s + len;
911 }
912
913 return TRUE;
914 }
915
916 {
917 const U8* const send = s + len;
918 const U8* x = first_variant;
919 STRLEN outlen = first_variant - s;
920
a0d7f935
KW
921 while (x < send) {
922 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
923 if (UNLIKELY(! cur_len)) {
924 break;
925 }
926 x += cur_len;
927 outlen++;
9f2abfde 928 }
9f2abfde 929
a0d7f935
KW
930 if (el)
931 *el = outlen;
9f2abfde 932
a0d7f935
KW
933 if (ep) {
934 *ep = x;
935 }
9f2abfde 936
a0d7f935 937 return (x == send);
33756530 938 }
9f2abfde
KW
939}
940
941/*
942
943=for apidoc is_c9strict_utf8_string_loc
944
945Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
946the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
947"utf8ness success") in the C<ep> pointer.
948
949See also C<L</is_c9strict_utf8_string_loclen>>.
950
951=cut
952*/
953
954#define is_c9strict_utf8_string_loc(s, len, ep) \
955 is_c9strict_utf8_string_loclen(s, len, ep, 0)
956
957/*
958
959=for apidoc is_c9strict_utf8_string_loclen
960
961Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
962the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
963"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
964characters in the C<el> pointer.
965
966See also C<L</is_c9strict_utf8_string_loc>>.
967
968=cut
969*/
970
971PERL_STATIC_INLINE bool
33756530 972S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 973{
33756530 974 const U8 * first_variant;
9f2abfde
KW
975
976 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
977
33756530
KW
978 if (len == 0) {
979 len = strlen((const char *) s);
980 }
981
982 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
983 if (el)
984 *el = len;
985
986 if (ep) {
987 *ep = s + len;
988 }
989
990 return TRUE;
991 }
992
993 {
994 const U8* const send = s + len;
995 const U8* x = first_variant;
996 STRLEN outlen = first_variant - s;
997
a0d7f935
KW
998 while (x < send) {
999 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
1000 if (UNLIKELY(! cur_len)) {
1001 break;
1002 }
1003 x += cur_len;
1004 outlen++;
9f2abfde 1005 }
9f2abfde 1006
a0d7f935
KW
1007 if (el)
1008 *el = outlen;
9f2abfde 1009
a0d7f935
KW
1010 if (ep) {
1011 *ep = x;
1012 }
9f2abfde 1013
a0d7f935 1014 return (x == send);
33756530 1015 }
9f2abfde
KW
1016}
1017
1018/*
1019
1020=for apidoc is_utf8_string_loc_flags
1021
1022Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1023case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1024"utf8ness success") in the C<ep> pointer.
1025
1026See also C<L</is_utf8_string_loclen_flags>>.
1027
1028=cut
1029*/
1030
1031#define is_utf8_string_loc_flags(s, len, ep, flags) \
1032 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
1033
1034
1035/* The above 3 actual functions could have been moved into the more general one
1036 * just below, and made #defines that call it with the right 'flags'. They are
1037 * currently kept separate to increase their chances of getting inlined */
1038
1039/*
1040
1041=for apidoc is_utf8_string_loclen_flags
1042
1043Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1044case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1045"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1046encoded characters in the C<el> pointer.
1047
1048See also C<L</is_utf8_string_loc_flags>>.
1049
1050=cut
1051*/
1052
1053PERL_STATIC_INLINE bool
f60f61fd 1054S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
9f2abfde 1055{
33756530 1056 const U8 * first_variant;
9f2abfde
KW
1057
1058 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
1059 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1060 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 1061
f60f61fd 1062 if (len == 0) {
a0d7f935 1063 len = strlen((const char *) s);
f60f61fd
KW
1064 }
1065
9f2abfde
KW
1066 if (flags == 0) {
1067 return is_utf8_string_loclen(s, len, ep, el);
1068 }
1069
d044b7a7 1070 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1071 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
1072 {
1073 return is_strict_utf8_string_loclen(s, len, ep, el);
1074 }
1075
d044b7a7 1076 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1077 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1078 {
1079 return is_c9strict_utf8_string_loclen(s, len, ep, el);
1080 }
1081
33756530
KW
1082 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1083 if (el)
1084 *el = len;
1085
1086 if (ep) {
1087 *ep = s + len;
1088 }
1089
1090 return TRUE;
1091 }
1092
1093 {
1094 const U8* send = s + len;
1095 const U8* x = first_variant;
1096 STRLEN outlen = first_variant - s;
1097
a0d7f935
KW
1098 while (x < send) {
1099 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1100 if (UNLIKELY(! cur_len)) {
1101 break;
1102 }
1103 x += cur_len;
1104 outlen++;
9f2abfde 1105 }
9f2abfde 1106
a0d7f935
KW
1107 if (el)
1108 *el = outlen;
9f2abfde 1109
a0d7f935
KW
1110 if (ep) {
1111 *ep = x;
1112 }
9f2abfde 1113
a0d7f935 1114 return (x == send);
33756530 1115 }
9f2abfde
KW
1116}
1117
1118/*
7c93d8f0
KW
1119=for apidoc utf8_distance
1120
1121Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1122and C<b>.
1123
1124WARNING: use only if you *know* that the pointers point inside the
1125same UTF-8 buffer.
1126
1127=cut
1128*/
1129
1130PERL_STATIC_INLINE IV
1131Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1132{
1133 PERL_ARGS_ASSERT_UTF8_DISTANCE;
1134
1135 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1136}
1137
1138/*
1139=for apidoc utf8_hop
1140
1141Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1142forward or backward.
1143
1144WARNING: do not use the following unless you *know* C<off> is within
1145the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1146on the first byte of character or just after the last byte of a character.
1147
1148=cut
1149*/
1150
1151PERL_STATIC_INLINE U8 *
1152Perl_utf8_hop(const U8 *s, SSize_t off)
1153{
1154 PERL_ARGS_ASSERT_UTF8_HOP;
1155
1156 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1157 * the bitops (especially ~) can create illegal UTF-8.
1158 * In other words: in Perl UTF-8 is not just for Unicode. */
1159
1160 if (off >= 0) {
1161 while (off--)
1162 s += UTF8SKIP(s);
1163 }
1164 else {
1165 while (off++) {
1166 s--;
1167 while (UTF8_IS_CONTINUATION(*s))
1168 s--;
1169 }
1170 }
7347ee54 1171 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
7c93d8f0 1172 return (U8 *)s;
7347ee54 1173 GCC_DIAG_RESTORE_STMT;
7c93d8f0
KW
1174}
1175
4dab108f 1176/*
65df57a8
TC
1177=for apidoc utf8_hop_forward
1178
1179Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1180forward.
1181
1182C<off> must be non-negative.
1183
1184C<s> must be before or equal to C<end>.
1185
1186When moving forward it will not move beyond C<end>.
1187
1188Will not exceed this limit even if the string is not valid "UTF-8".
1189
1190=cut
1191*/
1192
1193PERL_STATIC_INLINE U8 *
1194Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1195{
1196 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1197
1198 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1199 * the bitops (especially ~) can create illegal UTF-8.
1200 * In other words: in Perl UTF-8 is not just for Unicode. */
1201
1202 assert(s <= end);
1203 assert(off >= 0);
1204
1205 while (off--) {
1206 STRLEN skip = UTF8SKIP(s);
de979548 1207 if ((STRLEN)(end - s) <= skip) {
7347ee54 1208 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1209 return (U8 *)end;
7347ee54 1210 GCC_DIAG_RESTORE_STMT;
de979548 1211 }
65df57a8
TC
1212 s += skip;
1213 }
1214
7347ee54 1215 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1216 return (U8 *)s;
7347ee54 1217 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1218}
1219
1220/*
1221=for apidoc utf8_hop_back
1222
1223Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1224backward.
1225
1226C<off> must be non-positive.
1227
1228C<s> must be after or equal to C<start>.
1229
1230When moving backward it will not move before C<start>.
1231
1232Will not exceed this limit even if the string is not valid "UTF-8".
1233
1234=cut
1235*/
1236
1237PERL_STATIC_INLINE U8 *
1238Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1239{
1240 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1241
1242 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1243 * the bitops (especially ~) can create illegal UTF-8.
1244 * In other words: in Perl UTF-8 is not just for Unicode. */
1245
1246 assert(start <= s);
1247 assert(off <= 0);
1248
1249 while (off++ && s > start) {
1250 s--;
1251 while (UTF8_IS_CONTINUATION(*s) && s > start)
1252 s--;
1253 }
1254
7347ee54 1255 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1256 return (U8 *)s;
7347ee54 1257 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1258}
1259
1260/*
1261=for apidoc utf8_hop_safe
1262
1263Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1264either forward or backward.
1265
1266When moving backward it will not move before C<start>.
1267
1268When moving forward it will not move beyond C<end>.
1269
1270Will not exceed those limits even if the string is not valid "UTF-8".
1271
1272=cut
1273*/
1274
1275PERL_STATIC_INLINE U8 *
1276Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1277{
1278 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1279
1280 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1281 * the bitops (especially ~) can create illegal UTF-8.
1282 * In other words: in Perl UTF-8 is not just for Unicode. */
1283
1284 assert(start <= s && s <= end);
1285
1286 if (off >= 0) {
1287 return utf8_hop_forward(s, off, end);
1288 }
1289 else {
1290 return utf8_hop_back(s, off, start);
1291 }
1292}
1293
1294/*
4dab108f
KW
1295
1296=for apidoc is_utf8_valid_partial_char
1297
6cbb9248
KW
1298Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1299S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1300points. Otherwise, it returns 1 if there exists at least one non-empty
1301sequence of bytes that when appended to sequence C<s>, starting at position
1302C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1303otherwise returns 0.
1304
1305In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1306point.
1307
1308This is useful when a fixed-length buffer is being tested for being well-formed
1309UTF-8, but the final few bytes in it don't comprise a full character; that is,
1310it is split somewhere in the middle of the final code point's UTF-8
1311representation. (Presumably when the buffer is refreshed with the next chunk
1312of data, the new first bytes will complete the partial code point.) This
1313function is used to verify that the final bytes in the current buffer are in
1314fact the legal beginning of some code point, so that if they aren't, the
1315failure can be signalled without having to wait for the next read.
4dab108f
KW
1316
1317=cut
1318*/
2717076a
KW
1319#define is_utf8_valid_partial_char(s, e) \
1320 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1321
1322/*
1323
1324=for apidoc is_utf8_valid_partial_char_flags
1325
1326Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1327or not the input is a valid UTF-8 encoded partial character, but it takes an
1328extra parameter, C<flags>, which can further restrict which code points are
1329considered valid.
1330
1331If C<flags> is 0, this behaves identically to
1332C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1333of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1334there is any sequence of bytes that can complete the input partial character in
1335such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1336TRUE; otherwise FALSE. Non character code points cannot be determined based on
1337partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1338determined from just the first one or two bytes.
1339
1340=cut
1341 */
1342
56e4cf64 1343PERL_STATIC_INLINE bool
f1c999a7 1344S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1345{
f1c999a7 1346 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1347
f1c999a7 1348 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1349 |UTF8_DISALLOW_PERL_EXTENDED)));
4dab108f 1350
8875bd48 1351 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1352 return FALSE;
1353 }
1354
f1c999a7 1355 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1356}
1357
8bc127bf
KW
1358/*
1359
1360=for apidoc is_utf8_fixed_width_buf_flags
1361
1362Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1363is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1364otherwise it returns FALSE.
1365
1366If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1367without restriction. If the final few bytes of the buffer do not form a
1368complete code point, this will return TRUE anyway, provided that
1369C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1370
1371If C<flags> in non-zero, it can be any combination of the
1372C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1373same meanings.
1374
1375This function differs from C<L</is_utf8_string_flags>> only in that the latter
1376returns FALSE if the final few bytes of the string don't form a complete code
1377point.
1378
1379=cut
1380 */
1381#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1382 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1383
1384/*
1385
1386=for apidoc is_utf8_fixed_width_buf_loc_flags
1387
1388Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1389failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1390to the beginning of any partial character at the end of the buffer; if there is
1391no partial character C<*ep> will contain C<s>+C<len>.
1392
1393See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1394
1395=cut
1396*/
1397
1398#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1399 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1400
1401/*
1402
1403=for apidoc is_utf8_fixed_width_buf_loclen_flags
1404
1405Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1406complete, valid characters found in the C<el> pointer.
1407
1408=cut
1409*/
1410
1411PERL_STATIC_INLINE bool
1412S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
33756530 1413 STRLEN len,
8bc127bf
KW
1414 const U8 **ep,
1415 STRLEN *el,
1416 const U32 flags)
1417{
1418 const U8 * maybe_partial;
1419
1420 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1421
1422 if (! ep) {
1423 ep = &maybe_partial;
1424 }
1425
1426 /* If it's entirely valid, return that; otherwise see if the only error is
1427 * that the final few bytes are for a partial character */
1428 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1429 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1430}
1431
c8028aa6
TC
1432/* ------------------------------- perl.h ----------------------------- */
1433
1434/*
dcccc8ff
KW
1435=head1 Miscellaneous Functions
1436
41188aa0 1437=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1438
6602b933 1439Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1440If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1441
1442Return TRUE if the name is safe.
1443
796b6530 1444Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1445
1446=cut
1447*/
1448
1449PERL_STATIC_INLINE bool
41188aa0 1450S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1451 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1452 * perl itself uses xce*() functions which accept 8-bit strings.
1453 */
1454
1455 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1456
6c4650b3 1457 if (len > 1) {
c8028aa6 1458 char *null_at;
41188aa0 1459 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1460 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1461 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1462 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1463 what, op_name, pv, null_at+1);
c8028aa6
TC
1464 return FALSE;
1465 }
1466 }
1467
1468 return TRUE;
1469}
1470
1471/*
7cb3f959
TC
1472
1473Return true if the supplied filename has a newline character
fa6c7d00 1474immediately before the first (hopefully only) NUL.
7cb3f959
TC
1475
1476My original look at this incorrectly used the len from SvPV(), but
1477that's incorrect, since we allow for a NUL in pv[len-1].
1478
1479So instead, strlen() and work from there.
1480
1481This allow for the user reading a filename, forgetting to chomp it,
1482then calling:
1483
1484 open my $foo, "$file\0";
1485
1486*/
1487
1488#ifdef PERL_CORE
1489
1490PERL_STATIC_INLINE bool
1491S_should_warn_nl(const char *pv) {
1492 STRLEN len;
1493
1494 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1495
1496 len = strlen(pv);
1497
1498 return len > 0 && pv[len-1] == '\n';
1499}
1500
1501#endif
1502
81d52ecd
JH
1503/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1504
1505#define MAX_CHARSET_NAME_LENGTH 2
1506
1507PERL_STATIC_INLINE const char *
1508get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1509{
1510 /* Returns a string that corresponds to the name of the regex character set
1511 * given by 'flags', and *lenp is set the length of that string, which
1512 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1513
1514 *lenp = 1;
1515 switch (get_regex_charset(flags)) {
1516 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1517 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1518 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1519 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1520 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1521 *lenp = 2;
1522 return ASCII_MORE_RESTRICT_PAT_MODS;
1523 }
1524 /* The NOT_REACHED; hides an assert() which has a rather complex
1525 * definition in perl.h. */
1526 NOT_REACHED; /* NOTREACHED */
1527 return "?"; /* Unknown */
1528}
1529
7cb3f959 1530/*
ed382232
TC
1531
1532Return false if any get magic is on the SV other than taint magic.
1533
1534*/
1535
1536PERL_STATIC_INLINE bool
1537S_sv_only_taint_gmagic(SV *sv) {
1538 MAGIC *mg = SvMAGIC(sv);
1539
1540 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1541
1542 while (mg) {
1543 if (mg->mg_type != PERL_MAGIC_taint
1544 && !(mg->mg_flags & MGf_GSKIP)
1545 && mg->mg_virtual->svt_get) {
1546 return FALSE;
1547 }
1548 mg = mg->mg_moremagic;
1549 }
1550
1551 return TRUE;
1552}
1553
ed8ff0f3
DM
1554/* ------------------ cop.h ------------------------------------------- */
1555
1556
1557/* Enter a block. Push a new base context and return its address. */
1558
1559PERL_STATIC_INLINE PERL_CONTEXT *
1560S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1561{
1562 PERL_CONTEXT * cx;
1563
1564 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1565
1566 CXINC;
1567 cx = CX_CUR();
1568 cx->cx_type = type;
1569 cx->blk_gimme = gimme;
1570 cx->blk_oldsaveix = saveix;
4caf7d8c 1571 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1572 cx->blk_oldcop = PL_curcop;
4caf7d8c 1573 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1574 cx->blk_oldscopesp = PL_scopestack_ix;
1575 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1576 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1577
1578 PL_tmps_floor = PL_tmps_ix;
1579 CX_DEBUG(cx, "PUSH");
1580 return cx;
1581}
1582
1583
1584/* Exit a block (RETURN and LAST). */
1585
1586PERL_STATIC_INLINE void
1587S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1588{
1589 PERL_ARGS_ASSERT_CX_POPBLOCK;
1590
1591 CX_DEBUG(cx, "POP");
1592 /* these 3 are common to cx_popblock and cx_topblock */
1593 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1594 PL_scopestack_ix = cx->blk_oldscopesp;
1595 PL_curpm = cx->blk_oldpm;
1596
1597 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1598 * and leaves a CX entry lying around for repeated use, so
1599 * skip for multicall */ \
1600 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1601 || PL_savestack_ix == cx->blk_oldsaveix);
1602 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1603 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1604}
1605
1606/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1607 * Whereas cx_popblock() restores the state to the point just before
1608 * cx_pushblock() was called, cx_topblock() restores it to the point just
1609 * *after* cx_pushblock() was called. */
1610
1611PERL_STATIC_INLINE void
1612S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1613{
1614 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1615
1616 CX_DEBUG(cx, "TOP");
1617 /* these 3 are common to cx_popblock and cx_topblock */
1618 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1619 PL_scopestack_ix = cx->blk_oldscopesp;
1620 PL_curpm = cx->blk_oldpm;
1621
1622 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1623}
1624
1625
a73d8813
DM
1626PERL_STATIC_INLINE void
1627S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1628{
1629 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1630
1631 PERL_ARGS_ASSERT_CX_PUSHSUB;
1632
3f6bd23a 1633 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1634 cx->blk_sub.cv = cv;
1635 cx->blk_sub.olddepth = CvDEPTH(cv);
1636 cx->blk_sub.prevcomppad = PL_comppad;
1637 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1638 cx->blk_sub.retop = retop;
1639 SvREFCNT_inc_simple_void_NN(cv);
1640 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1641}
1642
1643
1644/* subsets of cx_popsub() */
1645
1646PERL_STATIC_INLINE void
1647S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1648{
1649 CV *cv;
1650
1651 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1652 assert(CxTYPE(cx) == CXt_SUB);
1653
1654 PL_comppad = cx->blk_sub.prevcomppad;
1655 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1656 cv = cx->blk_sub.cv;
1657 CvDEPTH(cv) = cx->blk_sub.olddepth;
1658 cx->blk_sub.cv = NULL;
1659 SvREFCNT_dec(cv);
1660}
1661
1662
1663/* handle the @_ part of leaving a sub */
1664
1665PERL_STATIC_INLINE void
1666S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1667{
1668 AV *av;
1669
1670 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1671 assert(CxTYPE(cx) == CXt_SUB);
1672 assert(AvARRAY(MUTABLE_AV(
1673 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1674 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1675
1676 CX_POP_SAVEARRAY(cx);
1677 av = MUTABLE_AV(PAD_SVl(0));
1678 if (UNLIKELY(AvREAL(av)))
1679 /* abandon @_ if it got reified */
1680 clear_defarray(av, 0);
1681 else {
1682 CLEAR_ARGARRAY(av);
1683 }
1684}
1685
1686
1687PERL_STATIC_INLINE void
1688S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1689{
1690 PERL_ARGS_ASSERT_CX_POPSUB;
1691 assert(CxTYPE(cx) == CXt_SUB);
1692
3f6bd23a 1693 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1694
1695 if (CxHASARGS(cx))
1696 cx_popsub_args(cx);
1697 cx_popsub_common(cx);
1698}
1699
1700
6a7d52cc
DM
1701PERL_STATIC_INLINE void
1702S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1703{
1704 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1705
1706 cx->blk_format.cv = cv;
1707 cx->blk_format.retop = retop;
1708 cx->blk_format.gv = gv;
1709 cx->blk_format.dfoutgv = PL_defoutgv;
1710 cx->blk_format.prevcomppad = PL_comppad;
1711 cx->blk_u16 = 0;
1712
1713 SvREFCNT_inc_simple_void_NN(cv);
1714 CvDEPTH(cv)++;
1715 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1716}
1717
1718
1719PERL_STATIC_INLINE void
1720S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1721{
1722 CV *cv;
1723 GV *dfout;
1724
1725 PERL_ARGS_ASSERT_CX_POPFORMAT;
1726 assert(CxTYPE(cx) == CXt_FORMAT);
1727
1728 dfout = cx->blk_format.dfoutgv;
1729 setdefout(dfout);
1730 cx->blk_format.dfoutgv = NULL;
1731 SvREFCNT_dec_NN(dfout);
1732
1733 PL_comppad = cx->blk_format.prevcomppad;
1734 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1735 cv = cx->blk_format.cv;
1736 cx->blk_format.cv = NULL;
1737 --CvDEPTH(cv);
1738 SvREFCNT_dec_NN(cv);
1739}
1740
1741
13febba5
DM
1742PERL_STATIC_INLINE void
1743S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1744{
1745 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1746
1747 cx->blk_eval.retop = retop;
1748 cx->blk_eval.old_namesv = namesv;
1749 cx->blk_eval.old_eval_root = PL_eval_root;
1750 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1751 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1752 cx->blk_eval.cur_top_env = PL_top_env;
1753
4c57ced5 1754 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1755 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1756 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1757}
1758
1759
1760PERL_STATIC_INLINE void
1761S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1762{
1763 SV *sv;
1764
1765 PERL_ARGS_ASSERT_CX_POPEVAL;
1766 assert(CxTYPE(cx) == CXt_EVAL);
1767
1768 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1769 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1770 PL_eval_root = cx->blk_eval.old_eval_root;
1771 sv = cx->blk_eval.cur_text;
4c57ced5 1772 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1773 cx->blk_eval.cur_text = NULL;
1774 SvREFCNT_dec_NN(sv);
1775 }
1776
1777 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1778 if (sv) {
1779 cx->blk_eval.old_namesv = NULL;
1780 SvREFCNT_dec_NN(sv);
1781 }
13febba5 1782}
6a7d52cc 1783
a73d8813 1784
d1b6bf72
DM
1785/* push a plain loop, i.e.
1786 * { block }
1787 * while (cond) { block }
1788 * for (init;cond;continue) { block }
1789 * This loop can be last/redo'ed etc.
1790 */
1791
1792PERL_STATIC_INLINE void
1793S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1794{
1795 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1796 cx->blk_loop.my_op = cLOOP;
1797}
1798
1799
1800/* push a true for loop, i.e.
1801 * for var (list) { block }
1802 */
1803
1804PERL_STATIC_INLINE void
1805S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1806{
1807 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1808
1809 /* this one line is common with cx_pushloop_plain */
1810 cx->blk_loop.my_op = cLOOP;
1811
1812 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1813 cx->blk_loop.itersave = itersave;
1814#ifdef USE_ITHREADS
1815 cx->blk_loop.oldcomppad = PL_comppad;
1816#endif
1817}
1818
1819
1820/* pop all loop types, including plain */
1821
1822PERL_STATIC_INLINE void
1823S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1824{
1825 PERL_ARGS_ASSERT_CX_POPLOOP;
1826
1827 assert(CxTYPE_is_LOOP(cx));
1828 if ( CxTYPE(cx) == CXt_LOOP_ARY
1829 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1830 {
1831 /* Free ary or cur. This assumes that state_u.ary.ary
1832 * aligns with state_u.lazysv.cur. See cx_dup() */
1833 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1834 cx->blk_loop.state_u.lazysv.cur = NULL;
1835 SvREFCNT_dec_NN(sv);
1836 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1837 sv = cx->blk_loop.state_u.lazysv.end;
1838 cx->blk_loop.state_u.lazysv.end = NULL;
1839 SvREFCNT_dec_NN(sv);
1840 }
1841 }
1842 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1843 SV *cursv;
1844 SV **svp = (cx)->blk_loop.itervar_u.svp;
1845 if ((cx->cx_type & CXp_FOR_GV))
1846 svp = &GvSV((GV*)svp);
1847 cursv = *svp;
1848 *svp = cx->blk_loop.itersave;
1849 cx->blk_loop.itersave = NULL;
1850 SvREFCNT_dec(cursv);
1851 }
1852}
1853
2a7b7c61
DM
1854
1855PERL_STATIC_INLINE void
7896dde7 1856S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1857{
7896dde7 1858 PERL_ARGS_ASSERT_CX_PUSHWHEN;
2a7b7c61 1859
7896dde7 1860 cx->blk_givwhen.leave_op = cLOGOP->op_other;
2a7b7c61
DM
1861}
1862
1863
1864PERL_STATIC_INLINE void
7896dde7 1865S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1866{
7896dde7
Z
1867 PERL_ARGS_ASSERT_CX_POPWHEN;
1868 assert(CxTYPE(cx) == CXt_WHEN);
2a7b7c61
DM
1869
1870 PERL_UNUSED_ARG(cx);
59a14f30 1871 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1872 /* currently NOOP */
1873}
1874
1875
7896dde7
Z
1876PERL_STATIC_INLINE void
1877S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1878{
1879 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1880
1881 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1882 cx->blk_givwhen.defsv_save = orig_defsv;
1883}
1884
1885
1886PERL_STATIC_INLINE void
1887S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1888{
1889 SV *sv;
1890
1891 PERL_ARGS_ASSERT_CX_POPGIVEN;
1892 assert(CxTYPE(cx) == CXt_GIVEN);
1893
1894 sv = GvSV(PL_defgv);
1895 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1896 cx->blk_givwhen.defsv_save = NULL;
1897 SvREFCNT_dec(sv);
1898}
1899
ec2c235b
KW
1900/* ------------------ util.h ------------------------------------------- */
1901
1902/*
1903=head1 Miscellaneous Functions
1904
1905=for apidoc foldEQ
1906
1907Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1908same
1909case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
1910match themselves and their opposite case counterparts. Non-cased and non-ASCII
1911range bytes match only themselves.
1912
1913=cut
1914*/
1915
1916PERL_STATIC_INLINE I32
1917Perl_foldEQ(const char *s1, const char *s2, I32 len)
1918{
1919 const U8 *a = (const U8 *)s1;
1920 const U8 *b = (const U8 *)s2;
1921
1922 PERL_ARGS_ASSERT_FOLDEQ;
1923
1924 assert(len >= 0);
1925
1926 while (len--) {
1927 if (*a != *b && *a != PL_fold[*b])
1928 return 0;
1929 a++,b++;
1930 }
1931 return 1;
1932}
1933
0f9cb40c 1934PERL_STATIC_INLINE I32
ec2c235b
KW
1935Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
1936{
1937 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
1938 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
1939 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
1940 * does it check that the strings each have at least 'len' characters */
1941
1942 const U8 *a = (const U8 *)s1;
1943 const U8 *b = (const U8 *)s2;
1944
1945 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
1946
1947 assert(len >= 0);
1948
1949 while (len--) {
1950 if (*a != *b && *a != PL_fold_latin1[*b]) {
1951 return 0;
1952 }
1953 a++, b++;
1954 }
1955 return 1;
1956}
1957
1958/*
1959=for apidoc foldEQ_locale
1960
1961Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1962same case-insensitively in the current locale; false otherwise.
1963
1964=cut
1965*/
1966
0f9cb40c 1967PERL_STATIC_INLINE I32
ec2c235b
KW
1968Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
1969{
1970 dVAR;
1971 const U8 *a = (const U8 *)s1;
1972 const U8 *b = (const U8 *)s2;
1973
1974 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
1975
1976 assert(len >= 0);
1977
1978 while (len--) {
1979 if (*a != *b && *a != PL_fold_locale[*b])
1980 return 0;
1981 a++,b++;
1982 }
1983 return 1;
1984}
1985
6dba01e2
KW
1986#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
1987
1988PERL_STATIC_INLINE void *
1989S_my_memrchr(const char * s, const char c, const STRLEN len)
1990{
1991 /* memrchr(), since many platforms lack it */
1992
1993 const char * t = s + len - 1;
1994
1995 PERL_ARGS_ASSERT_MY_MEMRCHR;
1996
1997 while (t >= s) {
1998 if (*t == c) {
1999 return (void *) t;
2000 }
2001 t--;
2002 }
2003
2004 return NULL;
2005}
2006
2007#endif
2008
ed382232 2009/*
c8028aa6
TC
2010 * ex: set ts=8 sts=4 sw=4 et:
2011 */