#endif
-#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
+ /* Keep these around for these files */
+#if ! defined(PERL_IN_REGEXEC_C) && ! defined(PERL_IN_UTF8_C)
# undef PERL_WORDSIZE
# undef PERL_COUNT_MULTIPLIER
# undef PERL_WORD_BOUNDARY_MASK
incomplete_char_action) \
STMT_START { \
const U8 * s = s0; \
+ const U8 * e_ = e; \
UV state = 0; \
\
- PERL_NON_CORE_CHECK_EMPTY(s,e); \
+ PERL_NON_CORE_CHECK_EMPTY(s, e_); \
\
do { \
state = dfa_tab[256 + state + dfa_tab[*s]]; \
if (UNLIKELY(state == 1)) { /* Rejecting state */ \
reject_action; \
} \
- } while (s < e); \
+ } while (s < e_); \
\
/* Here, dropped out of loop before end-of-char */ \
incomplete_char_action; \
=for apidoc utf8_hop
Return the UTF-8 pointer C<s> displaced by C<off> characters, either
-forward or backward.
+forward (if C<off> is positive) or backward (if negative). C<s> does not need
+to be pointing to the starting byte of a character. If it isn't, one count of
+C<off> will be used up to get to the start of the next character for forward
+hops, and to the start of the current character for negative ones.
-WARNING: do not use the following unless you *know* C<off> is within
-the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
-on the first byte of character or just after the last byte of a character.
+WARNING: Prefer L</utf8_hop_safe> to this one.
+
+Do NOT use this function unless you B<know> C<off> is within
+the UTF-8 data pointed to by C<s> B<and> that on entry C<s> is aligned
+on the first byte of a character or just after the last byte of a character.
=cut
*/
PERL_ARGS_ASSERT_UTF8_HOP;
/* Note: cannot use UTF8_IS_...() too eagerly here since e.g
- * the bitops (especially ~) can create illegal UTF-8.
+ * the XXX bitops (especially ~) can create illegal UTF-8.
* In other words: in Perl UTF-8 is not just for Unicode. */
- if (off >= 0) {
+ if (off > 0) {
+
+ /* Get to next non-continuation byte */
+ if (UNLIKELY(UTF8_IS_CONTINUATION(*s))) {
+ do {
+ s++;
+ }
+ while (UTF8_IS_CONTINUATION(*s));
+ off--;
+ }
+
while (off--)
s += UTF8SKIP(s);
}
s--;
}
}
+
GCC_DIAG_IGNORE(-Wcast-qual)
return (U8 *)s;
GCC_DIAG_RESTORE
=for apidoc utf8_hop_forward
Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
-forward.
+forward. C<s> does not need to be pointing to the starting byte of a
+character. If it isn't, one count of C<off> will be used up to get to the
+start of the next character.
C<off> must be non-negative.
assert(s <= end);
assert(off >= 0);
+ if (off && UNLIKELY(UTF8_IS_CONTINUATION(*s))) {
+ /* Get to next non-continuation byte */
+ do {
+ s++;
+ }
+ while (UTF8_IS_CONTINUATION(*s));
+ off--;
+ }
+
while (off--) {
STRLEN skip = UTF8SKIP(s);
if ((STRLEN)(end - s) <= skip) {
=for apidoc utf8_hop_back
Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
-backward.
+backward. C<s> does not need to be pointing to the starting byte of a
+character. If it isn't, one count of C<off> will be used up to get to that
+start.
C<off> must be non-positive.
=for apidoc utf8_hop_safe
Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
-either forward or backward.
+either forward or backward. C<s> does not need to be pointing to the starting
+byte of a character. If it isn't, one count of C<off> will be used up to get
+to the start of the next character for forward hops, and to the start of the
+current character for negative ones.
When moving backward it will not move before C<start>.
/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
-#if defined(PERL_IN_PP_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UNIVERSAL_C)
+#if defined(PERL_IN_PP_C) || defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UNIVERSAL_C)
#define MAX_CHARSET_NAME_LENGTH 2
SvREFCNT_dec(sv);
}
+/*
+=for apidoc newPADxVOP
+
+Constructs, checks and returns an op containing a pad offset. C<type> is
+the opcode, which should be one of C<OP_PADSV>, C<OP_PADAV>, C<OP_PADHV>
+or C<OP_PADCV>. The returned op will have the C<op_targ> field set by
+the C<padix> argument.
+
+This is convenient when constructing a large optree in nested function
+calls, as it avoids needing to store the pad op directly to set the
+C<op_targ> field as a side-effect. For example
+
+ o = op_append_elem(OP_LINESEQ, o,
+ newPADxVOP(OP_PADSV, 0, padix));
+
+=cut
+*/
+
+PERL_STATIC_INLINE OP *
+Perl_newPADxVOP(pTHX_ I32 type, I32 flags, PADOFFSET padix)
+{
+ PERL_ARGS_ASSERT_NEWPADXVOP;
+
+ assert(type == OP_PADSV || type == OP_PADAV || type == OP_PADHV
+ || type == OP_PADCV);
+ OP *o = newOP(type, flags);
+ o->op_targ = padix;
+ return o;
+}
+
/* ------------------ util.h ------------------------------------------- */
/*
assert(len >= 0);
while (len--) {
- if (*a != *b && *a != PL_fold_locale[*b])
+ if (*a != *b && *a != PL_fold_locale[*b]) {
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log,
+ "%s:%d: Our records indicate %02x is not a fold of %02x"
+ " or its mate %02x\n",
+ __FILE__, __LINE__, *a, *b, PL_fold_locale[*b]));
+
return 0;
+ }
a++,b++;
}
return 1;
}
}
- /* Then each of the three significant characters */
+ /* Then each of the four significant characters */
if (strchr(ret, 'm')) {
*mem_log_meat++ = 'm';
}
if (strchr(ret, 't')) {
*mem_log_meat++ = 't';
}
+ if (strchr(ret, 'c')) {
+ *mem_log_meat++ = 'c';
+ }
*mem_log_meat = '\0';
assert(mem_log_meat < PL_mem_log + sizeof(PL_mem_log));
#endif
+PERL_STATIC_INLINE PADNAME *
+Perl_padname_refcnt_inc(PADNAME *pn)
+{
+ PadnameREFCNT(pn)++;
+ return pn;
+}
+
/*
* ex: set ts=8 sts=4 sw=4 et:
*/