This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
bytes_to_utf8(): Remove obsolete comment
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
156 assert(isREGEXP(re));
157 return re->sv_u.svu_rx;
158}
159
27669aa4
FC
160/* ------------------------------- sv.h ------------------------------- */
161
162PERL_STATIC_INLINE SV *
163S_SvREFCNT_inc(SV *sv)
164{
2439e033 165 if (LIKELY(sv != NULL))
27669aa4
FC
166 SvREFCNT(sv)++;
167 return sv;
168}
169PERL_STATIC_INLINE SV *
170S_SvREFCNT_inc_NN(SV *sv)
171{
172 SvREFCNT(sv)++;
173 return sv;
174}
175PERL_STATIC_INLINE void
176S_SvREFCNT_inc_void(SV *sv)
177{
2439e033 178 if (LIKELY(sv != NULL))
27669aa4
FC
179 SvREFCNT(sv)++;
180}
75e16a44
FC
181PERL_STATIC_INLINE void
182S_SvREFCNT_dec(pTHX_ SV *sv)
183{
2439e033 184 if (LIKELY(sv != NULL)) {
75a9bf96 185 U32 rc = SvREFCNT(sv);
79e2a32a 186 if (LIKELY(rc > 1))
75a9bf96
DM
187 SvREFCNT(sv) = rc - 1;
188 else
189 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
190 }
191}
541377b1
FC
192
193PERL_STATIC_INLINE void
4a9a56a7
DM
194S_SvREFCNT_dec_NN(pTHX_ SV *sv)
195{
196 U32 rc = SvREFCNT(sv);
79e2a32a 197 if (LIKELY(rc > 1))
4a9a56a7
DM
198 SvREFCNT(sv) = rc - 1;
199 else
200 Perl_sv_free2(aTHX_ sv, rc);
201}
202
203PERL_STATIC_INLINE void
541377b1
FC
204SvAMAGIC_on(SV *sv)
205{
206 assert(SvROK(sv));
207 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
208}
209PERL_STATIC_INLINE void
210SvAMAGIC_off(SV *sv)
211{
212 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
213 HvAMAGIC_off(SvSTASH(SvRV(sv)));
214}
215
216PERL_STATIC_INLINE U32
541377b1
FC
217S_SvPADSTALE_on(SV *sv)
218{
c0683843 219 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
220 return SvFLAGS(sv) |= SVs_PADSTALE;
221}
222PERL_STATIC_INLINE U32
223S_SvPADSTALE_off(SV *sv)
224{
c0683843 225 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
226 return SvFLAGS(sv) &= ~SVs_PADSTALE;
227}
25fdce4a 228#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 229PERL_STATIC_INLINE STRLEN
6964422a 230S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 231{
25fdce4a 232 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
233 if (SvGAMAGIC(sv)) {
234 U8 *hopped = utf8_hop((U8 *)pv, pos);
235 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
236 return (STRLEN)(hopped - (U8 *)pv);
237 }
238 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
239}
240#endif
f019c49e 241
d1decf2b
TC
242/* ------------------------------- handy.h ------------------------------- */
243
244/* saves machine code for a common noreturn idiom typically used in Newx*() */
c1d6452f 245#ifdef GCC_DIAG_PRAGMA
6ab56f1e 246GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
c1d6452f 247#endif
d1decf2b
TC
248static void
249S_croak_memory_wrap(void)
250{
251 Perl_croak_nocontext("%s",PL_memory_wrap);
252}
c1d6452f 253#ifdef GCC_DIAG_PRAGMA
6ab56f1e 254GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
c1d6452f 255#endif
d1decf2b 256
a8a2ceaa
KW
257/* ------------------------------- utf8.h ------------------------------- */
258
2fe720e2
KW
259/*
260=head1 Unicode Support
261*/
262
55d09dc8
KW
263PERL_STATIC_INLINE void
264S_append_utf8_from_native_byte(const U8 byte, U8** dest)
265{
266 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
267 * encoded string at '*dest', updating '*dest' to include it */
268
55d09dc8
KW
269 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
270
6f2d5cbc 271 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 272 *((*dest)++) = byte;
55d09dc8 273 else {
a09ec51a
KW
274 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
275 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
276 }
277}
278
e123187a 279/*
2fe720e2 280=for apidoc valid_utf8_to_uvchr
2717076a 281Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
282the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
283it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
284non-Unicode code points are allowed.
285
286=cut
287
288 */
289
290PERL_STATIC_INLINE UV
291Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
292{
c41b2540 293 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
294 const U8* send = s + expectlen;
295 UV uv = *s;
296
297 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
298
299 if (retlen) {
300 *retlen = expectlen;
301 }
302
303 /* An invariant is trivially returned */
304 if (expectlen == 1) {
305 return uv;
306 }
307
308 /* Remove the leading bits that indicate the number of bytes, leaving just
309 * the bits that are part of the value */
310 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
311
312 /* Now, loop through the remaining bytes, accumulating each into the
313 * working total as we go. (I khw tried unrolling the loop for up to 4
314 * bytes, but there was no performance improvement) */
315 for (++s; s < send; s++) {
316 uv = UTF8_ACCUMULATE(uv, *s);
317 }
318
319 return UNI_TO_NATIVE(uv);
320
321}
322
1e599354
KW
323/*
324=for apidoc is_utf8_invariant_string
325
82c5d941 326Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 327regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
328EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
329are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
330the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
331characters are invariant, but so also are the C1 controls.
1e599354
KW
332
333If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
334use this option, that C<s> can't have embedded C<NUL> characters and has to
335have a terminating C<NUL> byte).
336
9f2abfde
KW
337See also
338C<L</is_utf8_string>>,
339C<L</is_utf8_string_flags>>,
340C<L</is_utf8_string_loc>>,
341C<L</is_utf8_string_loc_flags>>,
342C<L</is_utf8_string_loclen>>,
343C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
344C<L</is_utf8_fixed_width_buf_flags>>,
345C<L</is_utf8_fixed_width_buf_loc_flags>>,
346C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
347C<L</is_strict_utf8_string>>,
348C<L</is_strict_utf8_string_loc>>,
349C<L</is_strict_utf8_string_loclen>>,
350C<L</is_c9strict_utf8_string>>,
351C<L</is_c9strict_utf8_string_loc>>,
352and
353C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
354
355=cut
356*/
357
358PERL_STATIC_INLINE bool
359S_is_utf8_invariant_string(const U8* const s, const STRLEN len)
360{
361 const U8* const send = s + (len ? len : strlen((const char *)s));
362 const U8* x = s;
363
364 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING;
365
366 for (; x < send; ++x) {
367 if (!UTF8_IS_INVARIANT(*x))
368 return FALSE;
369 }
370
371 return TRUE;
372}
373
7c93d8f0 374/*
5ff889fb
KW
375=for apidoc is_utf8_string
376
82c5d941
KW
377Returns TRUE if the first C<len> bytes of string C<s> form a valid
378Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
379be calculated using C<strlen(s)> (which means if you use this option, that C<s>
380can't have embedded C<NUL> characters and has to have a terminating C<NUL>
381byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
382
2717076a
KW
383This function considers Perl's extended UTF-8 to be valid. That means that
384code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
385considered valid by this function. Use C<L</is_strict_utf8_string>>,
386C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
387code points are considered valid.
5ff889fb 388
9f2abfde
KW
389See also
390C<L</is_utf8_invariant_string>>,
391C<L</is_utf8_string_loc>>,
392C<L</is_utf8_string_loclen>>,
8bc127bf
KW
393C<L</is_utf8_fixed_width_buf_flags>>,
394C<L</is_utf8_fixed_width_buf_loc_flags>>,
395C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
396
397=cut
398*/
399
56e4cf64 400PERL_STATIC_INLINE bool
c41b2540 401Perl_is_utf8_string(const U8 *s, const STRLEN len)
5ff889fb 402{
35936d22
KW
403 /* This is now marked pure in embed.fnc, because isUTF8_CHAR now is pure.
404 * Be aware of possible changes to that */
405
5ff889fb
KW
406 const U8* const send = s + (len ? len : strlen((const char *)s));
407 const U8* x = s;
408
409 PERL_ARGS_ASSERT_IS_UTF8_STRING;
410
411 while (x < send) {
c41b2540
KW
412 const STRLEN cur_len = isUTF8_CHAR(x, send);
413 if (UNLIKELY(! cur_len)) {
5ff889fb
KW
414 return FALSE;
415 }
c41b2540 416 x += cur_len;
5ff889fb
KW
417 }
418
419 return TRUE;
420}
421
422/*
9f2abfde
KW
423=for apidoc is_strict_utf8_string
424
425Returns TRUE if the first C<len> bytes of string C<s> form a valid
426UTF-8-encoded string that is fully interchangeable by any application using
427Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
428calculated using C<strlen(s)> (which means if you use this option, that C<s>
429can't have embedded C<NUL> characters and has to have a terminating C<NUL>
430byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
431
432This function returns FALSE for strings containing any
433code points above the Unicode max of 0x10FFFF, surrogate code points, or
434non-character code points.
435
436See also
437C<L</is_utf8_invariant_string>>,
438C<L</is_utf8_string>>,
439C<L</is_utf8_string_flags>>,
440C<L</is_utf8_string_loc>>,
441C<L</is_utf8_string_loc_flags>>,
442C<L</is_utf8_string_loclen>>,
443C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
444C<L</is_utf8_fixed_width_buf_flags>>,
445C<L</is_utf8_fixed_width_buf_loc_flags>>,
446C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
447C<L</is_strict_utf8_string_loc>>,
448C<L</is_strict_utf8_string_loclen>>,
449C<L</is_c9strict_utf8_string>>,
450C<L</is_c9strict_utf8_string_loc>>,
451and
452C<L</is_c9strict_utf8_string_loclen>>.
453
454=cut
455*/
456
457PERL_STATIC_INLINE bool
458S_is_strict_utf8_string(const U8 *s, const STRLEN len)
459{
460 const U8* const send = s + (len ? len : strlen((const char *)s));
461 const U8* x = s;
462
463 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING;
464
465 while (x < send) {
466 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
467 if (UNLIKELY(! cur_len)) {
468 return FALSE;
469 }
470 x += cur_len;
471 }
472
473 return TRUE;
474}
475
476/*
477=for apidoc is_c9strict_utf8_string
478
479Returns TRUE if the first C<len> bytes of string C<s> form a valid
480UTF-8-encoded string that conforms to
481L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
482otherwise it returns FALSE. If C<len> is 0, it will be calculated using
483C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
484C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
485characters being ASCII constitute 'a valid UTF-8 string'.
486
487This function returns FALSE for strings containing any code points above the
488Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
489code points per
490L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
491
492See also
493C<L</is_utf8_invariant_string>>,
494C<L</is_utf8_string>>,
495C<L</is_utf8_string_flags>>,
496C<L</is_utf8_string_loc>>,
497C<L</is_utf8_string_loc_flags>>,
498C<L</is_utf8_string_loclen>>,
499C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
500C<L</is_utf8_fixed_width_buf_flags>>,
501C<L</is_utf8_fixed_width_buf_loc_flags>>,
502C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
503C<L</is_strict_utf8_string>>,
504C<L</is_strict_utf8_string_loc>>,
505C<L</is_strict_utf8_string_loclen>>,
506C<L</is_c9strict_utf8_string_loc>>,
507and
508C<L</is_c9strict_utf8_string_loclen>>.
509
510=cut
511*/
512
513PERL_STATIC_INLINE bool
514S_is_c9strict_utf8_string(const U8 *s, const STRLEN len)
515{
516 const U8* const send = s + (len ? len : strlen((const char *)s));
517 const U8* x = s;
518
519 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING;
520
521 while (x < send) {
522 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
523 if (UNLIKELY(! cur_len)) {
524 return FALSE;
525 }
526 x += cur_len;
527 }
528
529 return TRUE;
530}
531
532/* The above 3 functions could have been moved into the more general one just
533 * below, and made #defines that call it with the right 'flags'. They are
534 * currently kept separate to increase their chances of getting inlined */
535
536/*
537=for apidoc is_utf8_string_flags
538
539Returns TRUE if the first C<len> bytes of string C<s> form a valid
540UTF-8 string, subject to the restrictions imposed by C<flags>;
541returns FALSE otherwise. If C<len> is 0, it will be calculated
542using C<strlen(s)> (which means if you use this option, that C<s> can't have
543embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
544that all characters being ASCII constitute 'a valid UTF-8 string'.
545
546If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
547C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
548as C<L</is_strict_utf8_string>>; and if C<flags> is
549C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
550C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
551combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
552C<L</utf8n_to_uvchr>>, with the same meanings.
553
554See also
555C<L</is_utf8_invariant_string>>,
556C<L</is_utf8_string>>,
557C<L</is_utf8_string_loc>>,
558C<L</is_utf8_string_loc_flags>>,
559C<L</is_utf8_string_loclen>>,
560C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
561C<L</is_utf8_fixed_width_buf_flags>>,
562C<L</is_utf8_fixed_width_buf_loc_flags>>,
563C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
564C<L</is_strict_utf8_string>>,
565C<L</is_strict_utf8_string_loc>>,
566C<L</is_strict_utf8_string_loclen>>,
567C<L</is_c9strict_utf8_string>>,
568C<L</is_c9strict_utf8_string_loc>>,
569and
570C<L</is_c9strict_utf8_string_loclen>>.
571
572=cut
573*/
574
575PERL_STATIC_INLINE bool
576S_is_utf8_string_flags(const U8 *s, const STRLEN len, const U32 flags)
577{
578 const U8* const send = s + (len ? len : strlen((const char *)s));
579 const U8* x = s;
580
581 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
582 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
583 |UTF8_DISALLOW_ABOVE_31_BIT)));
584
585 if (flags == 0) {
586 return is_utf8_string(s, len);
587 }
588
589 if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
590 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
591 {
592 return is_strict_utf8_string(s, len);
593 }
594
595 if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
596 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
597 {
598 return is_c9strict_utf8_string(s, len);
599 }
600
601 while (x < send) {
602 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
603 if (UNLIKELY(! cur_len)) {
604 return FALSE;
605 }
606 x += cur_len;
607 }
608
609 return TRUE;
610}
611
612/*
5ff889fb
KW
613
614=for apidoc is_utf8_string_loc
615
2717076a 616Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 617case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 618"utf8ness success") in the C<ep> pointer.
5ff889fb 619
2717076a 620See also C<L</is_utf8_string_loclen>>.
5ff889fb 621
3964c812
KW
622=cut
623*/
624
625#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
626
627/*
628
5ff889fb
KW
629=for apidoc is_utf8_string_loclen
630
2717076a 631Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 632case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 633"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 634encoded characters in the C<el> pointer.
5ff889fb 635
2717076a 636See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
637
638=cut
639*/
640
56e4cf64 641PERL_STATIC_INLINE bool
c41b2540 642Perl_is_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb
KW
643{
644 const U8* const send = s + (len ? len : strlen((const char *)s));
645 const U8* x = s;
646 STRLEN outlen = 0;
647
648 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
649
650 while (x < send) {
c41b2540
KW
651 const STRLEN cur_len = isUTF8_CHAR(x, send);
652 if (UNLIKELY(! cur_len)) {
5ff889fb
KW
653 break;
654 }
c41b2540 655 x += cur_len;
5ff889fb
KW
656 outlen++;
657 }
658
659 if (el)
660 *el = outlen;
661
662 if (ep) {
663 *ep = x;
664 }
665
666 return (x == send);
667}
668
669/*
9f2abfde
KW
670
671=for apidoc is_strict_utf8_string_loc
672
673Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
674case of "utf8ness failure") or the location C<s>+C<len> (in the case of
675"utf8ness success") in the C<ep> pointer.
676
677See also C<L</is_strict_utf8_string_loclen>>.
678
679=cut
680*/
681
682#define is_strict_utf8_string_loc(s, len, ep) \
683 is_strict_utf8_string_loclen(s, len, ep, 0)
684
685/*
686
687=for apidoc is_strict_utf8_string_loclen
688
689Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
690case of "utf8ness failure") or the location C<s>+C<len> (in the case of
691"utf8ness success") in the C<ep> pointer, and the number of UTF-8
692encoded characters in the C<el> pointer.
693
694See also C<L</is_strict_utf8_string_loc>>.
695
696=cut
697*/
698
699PERL_STATIC_INLINE bool
700S_is_strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
701{
702 const U8* const send = s + (len ? len : strlen((const char *)s));
703 const U8* x = s;
704 STRLEN outlen = 0;
705
706 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
707
708 while (x < send) {
709 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
710 if (UNLIKELY(! cur_len)) {
711 break;
712 }
713 x += cur_len;
714 outlen++;
715 }
716
717 if (el)
718 *el = outlen;
719
720 if (ep) {
721 *ep = x;
722 }
723
724 return (x == send);
725}
726
727/*
728
729=for apidoc is_c9strict_utf8_string_loc
730
731Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
732the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
733"utf8ness success") in the C<ep> pointer.
734
735See also C<L</is_c9strict_utf8_string_loclen>>.
736
737=cut
738*/
739
740#define is_c9strict_utf8_string_loc(s, len, ep) \
741 is_c9strict_utf8_string_loclen(s, len, ep, 0)
742
743/*
744
745=for apidoc is_c9strict_utf8_string_loclen
746
747Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
748the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
749"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
750characters in the C<el> pointer.
751
752See also C<L</is_c9strict_utf8_string_loc>>.
753
754=cut
755*/
756
757PERL_STATIC_INLINE bool
758S_is_c9strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
759{
760 const U8* const send = s + (len ? len : strlen((const char *)s));
761 const U8* x = s;
762 STRLEN outlen = 0;
763
764 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
765
766 while (x < send) {
767 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
768 if (UNLIKELY(! cur_len)) {
769 break;
770 }
771 x += cur_len;
772 outlen++;
773 }
774
775 if (el)
776 *el = outlen;
777
778 if (ep) {
779 *ep = x;
780 }
781
782 return (x == send);
783}
784
785/*
786
787=for apidoc is_utf8_string_loc_flags
788
789Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
790case of "utf8ness failure") or the location C<s>+C<len> (in the case of
791"utf8ness success") in the C<ep> pointer.
792
793See also C<L</is_utf8_string_loclen_flags>>.
794
795=cut
796*/
797
798#define is_utf8_string_loc_flags(s, len, ep, flags) \
799 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
800
801
802/* The above 3 actual functions could have been moved into the more general one
803 * just below, and made #defines that call it with the right 'flags'. They are
804 * currently kept separate to increase their chances of getting inlined */
805
806/*
807
808=for apidoc is_utf8_string_loclen_flags
809
810Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
811case of "utf8ness failure") or the location C<s>+C<len> (in the case of
812"utf8ness success") in the C<ep> pointer, and the number of UTF-8
813encoded characters in the C<el> pointer.
814
815See also C<L</is_utf8_string_loc_flags>>.
816
817=cut
818*/
819
820PERL_STATIC_INLINE bool
821S_is_utf8_string_loclen_flags(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
822{
823 const U8* const send = s + (len ? len : strlen((const char *)s));
824 const U8* x = s;
825 STRLEN outlen = 0;
826
827 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
828 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
829 |UTF8_DISALLOW_ABOVE_31_BIT)));
830
831 if (flags == 0) {
832 return is_utf8_string_loclen(s, len, ep, el);
833 }
834
835 if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
836 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
837 {
838 return is_strict_utf8_string_loclen(s, len, ep, el);
839 }
840
841 if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
842 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
843 {
844 return is_c9strict_utf8_string_loclen(s, len, ep, el);
845 }
846
847 while (x < send) {
848 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
849 if (UNLIKELY(! cur_len)) {
850 break;
851 }
852 x += cur_len;
853 outlen++;
854 }
855
856 if (el)
857 *el = outlen;
858
859 if (ep) {
860 *ep = x;
861 }
862
863 return (x == send);
864}
865
866/*
7c93d8f0
KW
867=for apidoc utf8_distance
868
869Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
870and C<b>.
871
872WARNING: use only if you *know* that the pointers point inside the
873same UTF-8 buffer.
874
875=cut
876*/
877
878PERL_STATIC_INLINE IV
879Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
880{
881 PERL_ARGS_ASSERT_UTF8_DISTANCE;
882
883 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
884}
885
886/*
887=for apidoc utf8_hop
888
889Return the UTF-8 pointer C<s> displaced by C<off> characters, either
890forward or backward.
891
892WARNING: do not use the following unless you *know* C<off> is within
893the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
894on the first byte of character or just after the last byte of a character.
895
896=cut
897*/
898
899PERL_STATIC_INLINE U8 *
900Perl_utf8_hop(const U8 *s, SSize_t off)
901{
902 PERL_ARGS_ASSERT_UTF8_HOP;
903
904 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
905 * the bitops (especially ~) can create illegal UTF-8.
906 * In other words: in Perl UTF-8 is not just for Unicode. */
907
908 if (off >= 0) {
909 while (off--)
910 s += UTF8SKIP(s);
911 }
912 else {
913 while (off++) {
914 s--;
915 while (UTF8_IS_CONTINUATION(*s))
916 s--;
917 }
918 }
de979548 919 GCC_DIAG_IGNORE(-Wcast-qual);
7c93d8f0 920 return (U8 *)s;
de979548 921 GCC_DIAG_RESTORE;
7c93d8f0
KW
922}
923
4dab108f 924/*
65df57a8
TC
925=for apidoc utf8_hop_forward
926
927Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
928forward.
929
930C<off> must be non-negative.
931
932C<s> must be before or equal to C<end>.
933
934When moving forward it will not move beyond C<end>.
935
936Will not exceed this limit even if the string is not valid "UTF-8".
937
938=cut
939*/
940
941PERL_STATIC_INLINE U8 *
942Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
943{
944 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
945
946 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
947 * the bitops (especially ~) can create illegal UTF-8.
948 * In other words: in Perl UTF-8 is not just for Unicode. */
949
950 assert(s <= end);
951 assert(off >= 0);
952
953 while (off--) {
954 STRLEN skip = UTF8SKIP(s);
de979548
P
955 if ((STRLEN)(end - s) <= skip) {
956 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 957 return (U8 *)end;
de979548
P
958 GCC_DIAG_RESTORE;
959 }
65df57a8
TC
960 s += skip;
961 }
962
de979548 963 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 964 return (U8 *)s;
de979548 965 GCC_DIAG_RESTORE;
65df57a8
TC
966}
967
968/*
969=for apidoc utf8_hop_back
970
971Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
972backward.
973
974C<off> must be non-positive.
975
976C<s> must be after or equal to C<start>.
977
978When moving backward it will not move before C<start>.
979
980Will not exceed this limit even if the string is not valid "UTF-8".
981
982=cut
983*/
984
985PERL_STATIC_INLINE U8 *
986Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
987{
988 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
989
990 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
991 * the bitops (especially ~) can create illegal UTF-8.
992 * In other words: in Perl UTF-8 is not just for Unicode. */
993
994 assert(start <= s);
995 assert(off <= 0);
996
997 while (off++ && s > start) {
998 s--;
999 while (UTF8_IS_CONTINUATION(*s) && s > start)
1000 s--;
1001 }
1002
de979548 1003 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 1004 return (U8 *)s;
de979548 1005 GCC_DIAG_RESTORE;
65df57a8
TC
1006}
1007
1008/*
1009=for apidoc utf8_hop_safe
1010
1011Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1012either forward or backward.
1013
1014When moving backward it will not move before C<start>.
1015
1016When moving forward it will not move beyond C<end>.
1017
1018Will not exceed those limits even if the string is not valid "UTF-8".
1019
1020=cut
1021*/
1022
1023PERL_STATIC_INLINE U8 *
1024Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1025{
1026 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1027
1028 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1029 * the bitops (especially ~) can create illegal UTF-8.
1030 * In other words: in Perl UTF-8 is not just for Unicode. */
1031
1032 assert(start <= s && s <= end);
1033
1034 if (off >= 0) {
1035 return utf8_hop_forward(s, off, end);
1036 }
1037 else {
1038 return utf8_hop_back(s, off, start);
1039 }
1040}
1041
1042/*
4dab108f
KW
1043
1044=for apidoc is_utf8_valid_partial_char
1045
6cbb9248
KW
1046Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1047S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1048points. Otherwise, it returns 1 if there exists at least one non-empty
1049sequence of bytes that when appended to sequence C<s>, starting at position
1050C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1051otherwise returns 0.
1052
1053In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1054point.
1055
1056This is useful when a fixed-length buffer is being tested for being well-formed
1057UTF-8, but the final few bytes in it don't comprise a full character; that is,
1058it is split somewhere in the middle of the final code point's UTF-8
1059representation. (Presumably when the buffer is refreshed with the next chunk
1060of data, the new first bytes will complete the partial code point.) This
1061function is used to verify that the final bytes in the current buffer are in
1062fact the legal beginning of some code point, so that if they aren't, the
1063failure can be signalled without having to wait for the next read.
4dab108f
KW
1064
1065=cut
1066*/
2717076a
KW
1067#define is_utf8_valid_partial_char(s, e) \
1068 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1069
1070/*
1071
1072=for apidoc is_utf8_valid_partial_char_flags
1073
1074Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1075or not the input is a valid UTF-8 encoded partial character, but it takes an
1076extra parameter, C<flags>, which can further restrict which code points are
1077considered valid.
1078
1079If C<flags> is 0, this behaves identically to
1080C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1081of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1082there is any sequence of bytes that can complete the input partial character in
1083such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1084TRUE; otherwise FALSE. Non character code points cannot be determined based on
1085partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1086determined from just the first one or two bytes.
1087
1088=cut
1089 */
1090
56e4cf64 1091PERL_STATIC_INLINE bool
f1c999a7 1092S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1093{
f1c999a7 1094 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1095
f1c999a7
KW
1096 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
1097 |UTF8_DISALLOW_ABOVE_31_BIT)));
4dab108f 1098
8875bd48 1099 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1100 return FALSE;
1101 }
1102
f1c999a7 1103 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1104}
1105
8bc127bf
KW
1106/*
1107
1108=for apidoc is_utf8_fixed_width_buf_flags
1109
1110Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1111is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1112otherwise it returns FALSE.
1113
1114If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1115without restriction. If the final few bytes of the buffer do not form a
1116complete code point, this will return TRUE anyway, provided that
1117C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1118
1119If C<flags> in non-zero, it can be any combination of the
1120C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1121same meanings.
1122
1123This function differs from C<L</is_utf8_string_flags>> only in that the latter
1124returns FALSE if the final few bytes of the string don't form a complete code
1125point.
1126
1127=cut
1128 */
1129#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1130 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1131
1132/*
1133
1134=for apidoc is_utf8_fixed_width_buf_loc_flags
1135
1136Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1137failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1138to the beginning of any partial character at the end of the buffer; if there is
1139no partial character C<*ep> will contain C<s>+C<len>.
1140
1141See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1142
1143=cut
1144*/
1145
1146#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1147 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1148
1149/*
1150
1151=for apidoc is_utf8_fixed_width_buf_loclen_flags
1152
1153Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1154complete, valid characters found in the C<el> pointer.
1155
1156=cut
1157*/
1158
1159PERL_STATIC_INLINE bool
1160S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
1161 const STRLEN len,
1162 const U8 **ep,
1163 STRLEN *el,
1164 const U32 flags)
1165{
1166 const U8 * maybe_partial;
1167
1168 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1169
1170 if (! ep) {
1171 ep = &maybe_partial;
1172 }
1173
1174 /* If it's entirely valid, return that; otherwise see if the only error is
1175 * that the final few bytes are for a partial character */
1176 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1177 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1178}
1179
c8028aa6
TC
1180/* ------------------------------- perl.h ----------------------------- */
1181
1182/*
dcccc8ff
KW
1183=head1 Miscellaneous Functions
1184
41188aa0 1185=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1186
6602b933 1187Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1188If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1189
1190Return TRUE if the name is safe.
1191
796b6530 1192Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1193
1194=cut
1195*/
1196
1197PERL_STATIC_INLINE bool
41188aa0 1198S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1199 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1200 * perl itself uses xce*() functions which accept 8-bit strings.
1201 */
1202
1203 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1204
6c4650b3 1205 if (len > 1) {
c8028aa6 1206 char *null_at;
41188aa0 1207 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1208 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1209 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1210 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1211 what, op_name, pv, null_at+1);
c8028aa6
TC
1212 return FALSE;
1213 }
1214 }
1215
1216 return TRUE;
1217}
1218
1219/*
7cb3f959
TC
1220
1221Return true if the supplied filename has a newline character
fa6c7d00 1222immediately before the first (hopefully only) NUL.
7cb3f959
TC
1223
1224My original look at this incorrectly used the len from SvPV(), but
1225that's incorrect, since we allow for a NUL in pv[len-1].
1226
1227So instead, strlen() and work from there.
1228
1229This allow for the user reading a filename, forgetting to chomp it,
1230then calling:
1231
1232 open my $foo, "$file\0";
1233
1234*/
1235
1236#ifdef PERL_CORE
1237
1238PERL_STATIC_INLINE bool
1239S_should_warn_nl(const char *pv) {
1240 STRLEN len;
1241
1242 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1243
1244 len = strlen(pv);
1245
1246 return len > 0 && pv[len-1] == '\n';
1247}
1248
1249#endif
1250
81d52ecd
JH
1251/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1252
1253#define MAX_CHARSET_NAME_LENGTH 2
1254
1255PERL_STATIC_INLINE const char *
1256get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1257{
1258 /* Returns a string that corresponds to the name of the regex character set
1259 * given by 'flags', and *lenp is set the length of that string, which
1260 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1261
1262 *lenp = 1;
1263 switch (get_regex_charset(flags)) {
1264 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1265 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1266 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1267 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1268 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1269 *lenp = 2;
1270 return ASCII_MORE_RESTRICT_PAT_MODS;
1271 }
1272 /* The NOT_REACHED; hides an assert() which has a rather complex
1273 * definition in perl.h. */
1274 NOT_REACHED; /* NOTREACHED */
1275 return "?"; /* Unknown */
1276}
1277
7cb3f959 1278/*
ed382232
TC
1279
1280Return false if any get magic is on the SV other than taint magic.
1281
1282*/
1283
1284PERL_STATIC_INLINE bool
1285S_sv_only_taint_gmagic(SV *sv) {
1286 MAGIC *mg = SvMAGIC(sv);
1287
1288 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1289
1290 while (mg) {
1291 if (mg->mg_type != PERL_MAGIC_taint
1292 && !(mg->mg_flags & MGf_GSKIP)
1293 && mg->mg_virtual->svt_get) {
1294 return FALSE;
1295 }
1296 mg = mg->mg_moremagic;
1297 }
1298
1299 return TRUE;
1300}
1301
ed8ff0f3
DM
1302/* ------------------ cop.h ------------------------------------------- */
1303
1304
1305/* Enter a block. Push a new base context and return its address. */
1306
1307PERL_STATIC_INLINE PERL_CONTEXT *
1308S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1309{
1310 PERL_CONTEXT * cx;
1311
1312 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1313
1314 CXINC;
1315 cx = CX_CUR();
1316 cx->cx_type = type;
1317 cx->blk_gimme = gimme;
1318 cx->blk_oldsaveix = saveix;
4caf7d8c 1319 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1320 cx->blk_oldcop = PL_curcop;
4caf7d8c 1321 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1322 cx->blk_oldscopesp = PL_scopestack_ix;
1323 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1324 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1325
1326 PL_tmps_floor = PL_tmps_ix;
1327 CX_DEBUG(cx, "PUSH");
1328 return cx;
1329}
1330
1331
1332/* Exit a block (RETURN and LAST). */
1333
1334PERL_STATIC_INLINE void
1335S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1336{
1337 PERL_ARGS_ASSERT_CX_POPBLOCK;
1338
1339 CX_DEBUG(cx, "POP");
1340 /* these 3 are common to cx_popblock and cx_topblock */
1341 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1342 PL_scopestack_ix = cx->blk_oldscopesp;
1343 PL_curpm = cx->blk_oldpm;
1344
1345 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1346 * and leaves a CX entry lying around for repeated use, so
1347 * skip for multicall */ \
1348 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1349 || PL_savestack_ix == cx->blk_oldsaveix);
1350 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1351 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1352}
1353
1354/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1355 * Whereas cx_popblock() restores the state to the point just before
1356 * cx_pushblock() was called, cx_topblock() restores it to the point just
1357 * *after* cx_pushblock() was called. */
1358
1359PERL_STATIC_INLINE void
1360S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1361{
1362 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1363
1364 CX_DEBUG(cx, "TOP");
1365 /* these 3 are common to cx_popblock and cx_topblock */
1366 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1367 PL_scopestack_ix = cx->blk_oldscopesp;
1368 PL_curpm = cx->blk_oldpm;
1369
1370 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1371}
1372
1373
a73d8813
DM
1374PERL_STATIC_INLINE void
1375S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1376{
1377 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1378
1379 PERL_ARGS_ASSERT_CX_PUSHSUB;
1380
3f6bd23a 1381 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1382 cx->blk_sub.cv = cv;
1383 cx->blk_sub.olddepth = CvDEPTH(cv);
1384 cx->blk_sub.prevcomppad = PL_comppad;
1385 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1386 cx->blk_sub.retop = retop;
1387 SvREFCNT_inc_simple_void_NN(cv);
1388 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1389}
1390
1391
1392/* subsets of cx_popsub() */
1393
1394PERL_STATIC_INLINE void
1395S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1396{
1397 CV *cv;
1398
1399 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1400 assert(CxTYPE(cx) == CXt_SUB);
1401
1402 PL_comppad = cx->blk_sub.prevcomppad;
1403 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1404 cv = cx->blk_sub.cv;
1405 CvDEPTH(cv) = cx->blk_sub.olddepth;
1406 cx->blk_sub.cv = NULL;
1407 SvREFCNT_dec(cv);
1408}
1409
1410
1411/* handle the @_ part of leaving a sub */
1412
1413PERL_STATIC_INLINE void
1414S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1415{
1416 AV *av;
1417
1418 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1419 assert(CxTYPE(cx) == CXt_SUB);
1420 assert(AvARRAY(MUTABLE_AV(
1421 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1422 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1423
1424 CX_POP_SAVEARRAY(cx);
1425 av = MUTABLE_AV(PAD_SVl(0));
1426 if (UNLIKELY(AvREAL(av)))
1427 /* abandon @_ if it got reified */
1428 clear_defarray(av, 0);
1429 else {
1430 CLEAR_ARGARRAY(av);
1431 }
1432}
1433
1434
1435PERL_STATIC_INLINE void
1436S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1437{
1438 PERL_ARGS_ASSERT_CX_POPSUB;
1439 assert(CxTYPE(cx) == CXt_SUB);
1440
3f6bd23a 1441 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1442
1443 if (CxHASARGS(cx))
1444 cx_popsub_args(cx);
1445 cx_popsub_common(cx);
1446}
1447
1448
6a7d52cc
DM
1449PERL_STATIC_INLINE void
1450S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1451{
1452 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1453
1454 cx->blk_format.cv = cv;
1455 cx->blk_format.retop = retop;
1456 cx->blk_format.gv = gv;
1457 cx->blk_format.dfoutgv = PL_defoutgv;
1458 cx->blk_format.prevcomppad = PL_comppad;
1459 cx->blk_u16 = 0;
1460
1461 SvREFCNT_inc_simple_void_NN(cv);
1462 CvDEPTH(cv)++;
1463 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1464}
1465
1466
1467PERL_STATIC_INLINE void
1468S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1469{
1470 CV *cv;
1471 GV *dfout;
1472
1473 PERL_ARGS_ASSERT_CX_POPFORMAT;
1474 assert(CxTYPE(cx) == CXt_FORMAT);
1475
1476 dfout = cx->blk_format.dfoutgv;
1477 setdefout(dfout);
1478 cx->blk_format.dfoutgv = NULL;
1479 SvREFCNT_dec_NN(dfout);
1480
1481 PL_comppad = cx->blk_format.prevcomppad;
1482 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1483 cv = cx->blk_format.cv;
1484 cx->blk_format.cv = NULL;
1485 --CvDEPTH(cv);
1486 SvREFCNT_dec_NN(cv);
1487}
1488
1489
13febba5
DM
1490PERL_STATIC_INLINE void
1491S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1492{
1493 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1494
1495 cx->blk_eval.retop = retop;
1496 cx->blk_eval.old_namesv = namesv;
1497 cx->blk_eval.old_eval_root = PL_eval_root;
1498 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1499 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1500 cx->blk_eval.cur_top_env = PL_top_env;
1501
4c57ced5 1502 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1503 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1504 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1505}
1506
1507
1508PERL_STATIC_INLINE void
1509S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1510{
1511 SV *sv;
1512
1513 PERL_ARGS_ASSERT_CX_POPEVAL;
1514 assert(CxTYPE(cx) == CXt_EVAL);
1515
1516 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1517 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1518 PL_eval_root = cx->blk_eval.old_eval_root;
1519 sv = cx->blk_eval.cur_text;
4c57ced5 1520 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1521 cx->blk_eval.cur_text = NULL;
1522 SvREFCNT_dec_NN(sv);
1523 }
1524
1525 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1526 if (sv) {
1527 cx->blk_eval.old_namesv = NULL;
1528 SvREFCNT_dec_NN(sv);
1529 }
13febba5 1530}
6a7d52cc 1531
a73d8813 1532
d1b6bf72
DM
1533/* push a plain loop, i.e.
1534 * { block }
1535 * while (cond) { block }
1536 * for (init;cond;continue) { block }
1537 * This loop can be last/redo'ed etc.
1538 */
1539
1540PERL_STATIC_INLINE void
1541S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1542{
1543 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1544 cx->blk_loop.my_op = cLOOP;
1545}
1546
1547
1548/* push a true for loop, i.e.
1549 * for var (list) { block }
1550 */
1551
1552PERL_STATIC_INLINE void
1553S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1554{
1555 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1556
1557 /* this one line is common with cx_pushloop_plain */
1558 cx->blk_loop.my_op = cLOOP;
1559
1560 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1561 cx->blk_loop.itersave = itersave;
1562#ifdef USE_ITHREADS
1563 cx->blk_loop.oldcomppad = PL_comppad;
1564#endif
1565}
1566
1567
1568/* pop all loop types, including plain */
1569
1570PERL_STATIC_INLINE void
1571S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1572{
1573 PERL_ARGS_ASSERT_CX_POPLOOP;
1574
1575 assert(CxTYPE_is_LOOP(cx));
1576 if ( CxTYPE(cx) == CXt_LOOP_ARY
1577 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1578 {
1579 /* Free ary or cur. This assumes that state_u.ary.ary
1580 * aligns with state_u.lazysv.cur. See cx_dup() */
1581 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1582 cx->blk_loop.state_u.lazysv.cur = NULL;
1583 SvREFCNT_dec_NN(sv);
1584 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1585 sv = cx->blk_loop.state_u.lazysv.end;
1586 cx->blk_loop.state_u.lazysv.end = NULL;
1587 SvREFCNT_dec_NN(sv);
1588 }
1589 }
1590 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1591 SV *cursv;
1592 SV **svp = (cx)->blk_loop.itervar_u.svp;
1593 if ((cx->cx_type & CXp_FOR_GV))
1594 svp = &GvSV((GV*)svp);
1595 cursv = *svp;
1596 *svp = cx->blk_loop.itersave;
1597 cx->blk_loop.itersave = NULL;
1598 SvREFCNT_dec(cursv);
1599 }
1600}
1601
2a7b7c61
DM
1602
1603PERL_STATIC_INLINE void
1604S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1605{
1606 PERL_ARGS_ASSERT_CX_PUSHWHEN;
1607
1608 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1609}
1610
1611
1612PERL_STATIC_INLINE void
1613S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1614{
1615 PERL_ARGS_ASSERT_CX_POPWHEN;
1616 assert(CxTYPE(cx) == CXt_WHEN);
1617
1618 PERL_UNUSED_ARG(cx);
59a14f30 1619 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1620 /* currently NOOP */
1621}
1622
1623
1624PERL_STATIC_INLINE void
1625S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1626{
1627 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1628
1629 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1630 cx->blk_givwhen.defsv_save = orig_defsv;
1631}
1632
1633
1634PERL_STATIC_INLINE void
1635S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1636{
1637 SV *sv;
1638
1639 PERL_ARGS_ASSERT_CX_POPGIVEN;
1640 assert(CxTYPE(cx) == CXt_GIVEN);
1641
1642 sv = GvSV(PL_defgv);
1643 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1644 cx->blk_givwhen.defsv_save = NULL;
1645 SvREFCNT_dec(sv);
1646}
1647
ec2c235b
KW
1648/* ------------------ util.h ------------------------------------------- */
1649
1650/*
1651=head1 Miscellaneous Functions
1652
1653=for apidoc foldEQ
1654
1655Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1656same
1657case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
1658match themselves and their opposite case counterparts. Non-cased and non-ASCII
1659range bytes match only themselves.
1660
1661=cut
1662*/
1663
1664PERL_STATIC_INLINE I32
1665Perl_foldEQ(const char *s1, const char *s2, I32 len)
1666{
1667 const U8 *a = (const U8 *)s1;
1668 const U8 *b = (const U8 *)s2;
1669
1670 PERL_ARGS_ASSERT_FOLDEQ;
1671
1672 assert(len >= 0);
1673
1674 while (len--) {
1675 if (*a != *b && *a != PL_fold[*b])
1676 return 0;
1677 a++,b++;
1678 }
1679 return 1;
1680}
1681
0f9cb40c 1682PERL_STATIC_INLINE I32
ec2c235b
KW
1683Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
1684{
1685 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
1686 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
1687 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
1688 * does it check that the strings each have at least 'len' characters */
1689
1690 const U8 *a = (const U8 *)s1;
1691 const U8 *b = (const U8 *)s2;
1692
1693 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
1694
1695 assert(len >= 0);
1696
1697 while (len--) {
1698 if (*a != *b && *a != PL_fold_latin1[*b]) {
1699 return 0;
1700 }
1701 a++, b++;
1702 }
1703 return 1;
1704}
1705
1706/*
1707=for apidoc foldEQ_locale
1708
1709Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1710same case-insensitively in the current locale; false otherwise.
1711
1712=cut
1713*/
1714
0f9cb40c 1715PERL_STATIC_INLINE I32
ec2c235b
KW
1716Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
1717{
1718 dVAR;
1719 const U8 *a = (const U8 *)s1;
1720 const U8 *b = (const U8 *)s2;
1721
1722 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
1723
1724 assert(len >= 0);
1725
1726 while (len--) {
1727 if (*a != *b && *a != PL_fold_locale[*b])
1728 return 0;
1729 a++,b++;
1730 }
1731 return 1;
1732}
1733
ed382232 1734/*
c8028aa6
TC
1735 * ex: set ts=8 sts=4 sw=4 et:
1736 */