const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
I32 p;
- GET_RE_DEBUG_FLAGS_DECL;
+ DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_REGCPPUSH;
{
UV i;
U32 paren;
- GET_RE_DEBUG_FLAGS_DECL;
+ DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_REGCPPOP;
* Ideally this could be replaced by a just an array of function pointers
* to the C library functions that implement the macros this calls.
* However, to compile, the precise function signatures are required, and
- * these may vary from platform to to platform. To avoid having to figure
+ * these may vary from platform to platform. To avoid having to figure
* out what those all are on each platform, I (khw) am using this method,
* which adds an extra layer of function call overhead (unless the C
* optimizer strips it away). But we don't particularly care about
* rules, ignoring any locale. So use the Unicode function if this class
* requires an inversion list, and use the Unicode macro otherwise. */
- dVAR;
PERL_ARGS_ASSERT_ISFOO_UTF8_LC;
RXi_GET_DECL(prog,progi);
regmatch_info reginfo_buf; /* create some info to pass to find_byclass */
regmatch_info *const reginfo = ®info_buf;
- GET_RE_DEBUG_FLAGS_DECL;
+ DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_RE_INTUIT_START;
PERL_UNUSED_ARG(flags);
/* Like FBC_UTF8_A, but TEST_UV is a macro which takes a UV as its input, and
* TEST_UTF8 is a macro that for the same input code points returns identically
- * to TEST_UV, but takes a pointer to a UTF-8 encoded string instead */
+ * to TEST_UV, but takes a pointer to a UTF-8 encoded string instead (and an
+ * end pointer as well) */
#define FBC_UTF8(TEST_UV, TEST_UTF8, IF_SUCCESS, IF_FAIL) \
if (s == reginfo->strbeg) { \
tmp = '\n'; \
S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
const char *strend, regmatch_info *reginfo)
{
- dVAR;
/* TRUE if x+ need not match at just the 1st pos of run of x's */
const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
U8 *bitmap=NULL;
- GET_RE_DEBUG_FLAGS_DECL;
+ DECLARE_AND_GET_RE_DEBUG_FLAGS;
/* We can't just allocate points here. We need to wrap it in
* an SV so it gets freed properly if there is a croak while
regmatch_info *const reginfo = ®info_buf;
regexp_paren_pair *swap = NULL;
I32 oldsave;
- GET_RE_DEBUG_FLAGS_DECL;
+ DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_REGEXEC_FLAGS;
PERL_UNUSED_ARG(data);
if (!startpos ||
((flags & REXEC_FAIL_ON_UNDERFLOW) && startpos < stringarg))
{
- DEBUG_r(Perl_re_printf( aTHX_
+ DEBUG_GPOS_r(Perl_re_printf( aTHX_
"fail: ganch-gofs before earliest possible start\n"));
return 0;
}
minlen = prog->minlen;
if ((startpos + minlen) > strend || startpos < strbeg) {
- DEBUG_r(Perl_re_printf( aTHX_
- "Regex match can't succeed, so not even tried\n"));
+ DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
+ "Regex match can't succeed, so not even tried\n"));
return 0;
}
to_utf8_substr(prog);
}
ch = SvPVX_const(prog->anchored_utf8)[0];
- REXEC_FBC_SCAN(0, /* 0=>not-utf8 */
+ REXEC_FBC_SCAN(1, /* 1=>utf8 */
if (*s == ch) {
DEBUG_EXECUTE_r( did_match = 1 );
if (regtry(reginfo, &s)) goto got_it;
U32 depth = 0; /* used by REGCP_SET */
#endif
RXi_GET_DECL(prog,progi);
- GET_RE_DEBUG_FLAGS_DECL;
+ DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_REGTRY;
* to/from code points */
bool utf8_has_been_setup = FALSE;
- dVAR;
U8 *pat = (U8*)STRING(text_node);
U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
}
}
else if (c1 > 255) {
- const unsigned int * remaining_folds;
- unsigned int first_fold;
+ const U32 * remaining_folds;
+ U32 first_fold;
/* Look up what code points (besides c1) fold to c1; e.g.,
* [ 'K', KELVIN_SIGN ] both fold to 'k'. */
}
while (prev == GCB_Extend);
- return prev != GCB_XPG_XX;
+ return prev != GCB_ExtPict_XX;
}
default:
STATIC GCB_enum
S_backup_one_GCB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
{
- dVAR;
GCB_enum gcb;
PERL_ARGS_ASSERT_BACKUP_ONE_GCB;
STATIC LB_enum
S_advance_one_LB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
{
- dVAR;
LB_enum lb;
STATIC LB_enum
S_backup_one_LB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
{
- dVAR;
LB_enum lb;
PERL_ARGS_ASSERT_BACKUP_ONE_LB;
STATIC SB_enum
S_advance_one_SB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
{
- dVAR;
SB_enum sb;
PERL_ARGS_ASSERT_ADVANCE_ONE_SB;
STATIC SB_enum
S_backup_one_SB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
{
- dVAR;
SB_enum sb;
PERL_ARGS_ASSERT_BACKUP_ONE_SB;
const bool utf8_target,
const bool skip_Extend_Format)
{
- dVAR;
WB_enum wb;
PERL_ARGS_ASSERT_ADVANCE_ONE_WB;
STATIC WB_enum
S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
{
- dVAR;
WB_enum wb;
PERL_ARGS_ASSERT_BACKUP_ONE_WB;
rest of the pattern. Variable and state names reflect this convention.
The states in the main switch are the union of ops and failure/success of
-substates associated with with that op. For example, IFMATCH is the op
+substates associated with that op. For example, IFMATCH is the op
that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
successfully matched A and IFMATCH_A_fail is a state saying that we have
STATIC SSize_t
S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
{
- dVAR;
const bool utf8_target = reginfo->is_utf8_target;
const U32 uniflags = UTF8_ALLOW_DEFAULT;
REGEXP *rex_sv = reginfo->prog;
#endif
#ifdef DEBUGGING
- GET_RE_DEBUG_FLAGS_DECL;
+ DECLARE_AND_GET_RE_DEBUG_FLAGS;
#endif
/* protect against undef(*^R) */
rex->recurse_locinput[arg]= locinput;
DEBUG_r({
- GET_RE_DEBUG_FLAGS_DECL;
+ DECLARE_AND_GET_RE_DEBUG_FLAGS;
DEBUG_STACK_r({
Perl_re_exec_indentf( aTHX_
"entering GOSUB, prev_recurse_locinput=%p recurse_locinput[%d]=%p\n",
/* NOTREACHED */
case EVAL: /* /(?{...})B/ /(??{A})B/ and /(?(?{...})X|Y)B/ */
- if (cur_eval && cur_eval->locinput==locinput) {
+ if (logical == 2 && cur_eval && cur_eval->locinput==locinput) {
if ( ++nochange_depth > max_nochange_depth )
Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
} else {
/* push a new regex state, then continue at scan */
{
regmatch_state *newst;
+ DECLARE_AND_GET_RE_DEBUG_FLAGS;
- DEBUG_STACK_r({
+ DEBUG_r( /* DEBUG_STACK_r */
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_STACK)) {
regmatch_state *cur = st;
regmatch_state *curyes = yes_state;
U32 i;
if (curyes == cur)
curyes = cur->u.yes.prev_yes_state;
}
- } else
+ } else {
DEBUG_STATE_pp("push")
- );
+ });
depth++;
st->locinput = locinput;
st->loceol = loceol;
S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
char * loceol, regmatch_info *const reginfo, I32 max _pDEPTH)
{
- dVAR;
char *scan; /* Pointer to current position in target string */
I32 c;
char *this_eol = loceol; /* potentially adjusted version. */
*startposp = scan;
DEBUG_r({
- GET_RE_DEBUG_FLAGS_DECL;
+ DECLARE_AND_GET_RE_DEBUG_FLAGS;
DEBUG_EXECUTE_r({
SV * const prop = sv_newmortal();
regprop(prog, prop, p, reginfo, NULL);
STATIC bool
S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const U8* const p_end, const bool utf8_target)
{
- dVAR;
const char flags = (inRANGE(OP(n), ANYOFH, ANYOFHs))
? 0
: ANYOF_FLAGS(n);
}
else if (flags & ANYOF_LOCALE_FLAGS) {
if ( (flags & ANYOFL_FOLD)
- && c < sizeof(PL_fold_locale)
+ && c < 256
&& ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
{
match = TRUE;
&& IN_UTF8_CTYPE_LOCALE)))
{
SV* only_utf8_locale = NULL;
- SV * const definition = _get_regclass_nonbitmap_data(prog, n, TRUE,
- 0, &only_utf8_locale, NULL);
+ SV * const definition =
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
+ get_regclass_nonbitmap_data(prog, n, TRUE, 0,
+ &only_utf8_locale, NULL);
+#else
+ get_re_gclass_nonbitmap_data(prog, n, TRUE, 0,
+ &only_utf8_locale, NULL);
+#endif
if (definition) {
U8 utf8_buffer[2];
U8 * utf8_p;
/* this regexp is also owned by the new PL_reg_curpm, which
will try to free it. */
av_push(PL_regex_padav, repointer);
- PL_reg_curpm->op_pmoffset = av_tindex(PL_regex_padav);
+ PL_reg_curpm->op_pmoffset = av_top_index(PL_regex_padav);
PL_regex_pad = AvARRAY(PL_regex_padav);
}
#endif
* so code using it would then break), and there has to be a GCB break
* before and after the character. */
- dVAR;
GCB_enum cp_gcb_val, prev_cp_gcb_val, next_cp_gcb_val;
const U8 * prev_cp_start;
}
/*
-=head1 Unicode Support
+=for apidoc_section Unicode Support
=for apidoc isSCRIPT_RUN
* characters for at least one language in the Unicode Common Locale Data
* Repository [CLDR]. */
- dVAR;
/* Things that match /\d/u */
SV * decimals_invlist = PL_XPosix_ptrs[_CC_DIGIT];
/* If is within the range [+0 .. +9] of the script's zero, it also is a
* digit in that script. We can skip the rest of this code for this
* character. */
- if (UNLIKELY( zero_of_run
- && cp >= zero_of_run
- && cp - zero_of_run <= 9))
- {
+ if (UNLIKELY(zero_of_run && withinCOUNT(cp, zero_of_run, 9))) {
continue;
}
* several scripts, and the intersection is not empty. However, if the
* character is a decimal digit, it could still mean failure if it is
* from the wrong sequence of 10. So, we need to look at if it's a
- * digit. We've already handled the 10 decimal digits, and the next
+ * digit. We've already handled the 10 digits [0-9], and the next
* lowest one is this one: */
if (cp < FIRST_NON_ASCII_DECIMAL_DIGIT) {
continue; /* Not a digit; this character is part of the run */
if ( script_of_char >= 0
&& (zero_of_char = script_zeros[script_of_char]))
{
- if ( cp < zero_of_char
- || cp > zero_of_char + 9)
- {
+ if (! withinCOUNT(cp, zero_of_char, 9)) {
continue; /* Not a digit; this character is part of the run
*/
}