locinput = (p); \
SET_nextchr
-
-#define LOAD_UTF8_CHARCLASS(swash_ptr, property_name, invlist) STMT_START { \
- if (!swash_ptr) { \
- U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; \
- swash_ptr = _core_swash_init("utf8", property_name, &PL_sv_undef, \
- 1, 0, invlist, &flags); \
- assert(swash_ptr); \
- } \
- } STMT_END
-
-/* If in debug mode, we test that a known character properly matches */
-#ifdef DEBUGGING
-# define LOAD_UTF8_CHARCLASS_DEBUG_TEST(swash_ptr, \
- property_name, \
- invlist, \
- utf8_char_in_property) \
- LOAD_UTF8_CHARCLASS(swash_ptr, property_name, invlist); \
- assert(swash_fetch(swash_ptr, (U8 *) utf8_char_in_property, TRUE));
-#else
-# define LOAD_UTF8_CHARCLASS_DEBUG_TEST(swash_ptr, \
- property_name, \
- invlist, \
- utf8_char_in_property) \
- LOAD_UTF8_CHARCLASS(swash_ptr, property_name, invlist)
-#endif
-
#define PLACEHOLDER /* Something for the preprocessor to grab onto */
/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
case ANYOFM: /* ARG() is the base byte; FLAGS() the mask byte */
/* UTF-8ness doesn't matter, so use 0 */
REXEC_FBC_FIND_NEXT_SCAN(0,
- (char *) find_next_masked((U8 *) s, (U8 *) strend, ARG(c), FLAGS(c)));
+ (char *) find_next_masked((U8 *) s, (U8 *) strend,
+ (U8) ARG(c), FLAGS(c)));
break;
case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */
else { /* an EXACTFish node which doesn't begin with a multi-char fold */
c1 = is_utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
if (c1 > 255) {
- /* Load the folds hash, if not already done */
- SV** listp;
- if (! PL_utf8_foldclosures) {
- _load_PL_utf8_foldclosures();
+ const unsigned int * remaining_folds_to_list;
+ unsigned int first_folds_to;
+
+ /* Look up what code points (besides c1) fold to c1; e.g.,
+ * [ 'K', KELVIN_SIGN ] both fold to 'k'. */
+ Size_t folds_to_count = _inverse_folds(c1,
+ &first_folds_to,
+ &remaining_folds_to_list);
+ if (folds_to_count == 0) {
+ c2 = c1; /* there is only a single character that could
+ match */
}
-
- /* The fold closures data structure is a hash with the keys
- * being the UTF-8 of every character that is folded to, like
- * 'k', and the values each an array of all code points that
- * fold to its key. e.g. [ 'k', 'K', KELVIN_SIGN ].
- * Multi-character folds are not included */
- if ((! (listp = hv_fetch(PL_utf8_foldclosures,
- (char *) pat,
- UTF8SKIP(pat),
- FALSE))))
- {
- /* Not found in the hash, therefore there are no folds
- * containing it, so there is only a single character that
- * could match */
- c2 = c1;
+ else if (folds_to_count != 1) {
+ /* If there aren't exactly two folds to this (itself and
+ * another), it is outside the scope of this function */
+ use_chrtest_void = TRUE;
}
- else { /* Does participate in folds */
- AV* list = (AV*) *listp;
- if (av_tindex_skip_len_mg(list) != 1) {
-
- /* If there aren't exactly two folds to this, it is
- * outside the scope of this function */
- use_chrtest_void = TRUE;
- }
- else { /* There are two. Get them */
- SV** c_p = av_fetch(list, 0, FALSE);
- if (c_p == NULL) {
- Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
- }
- c1 = SvUV(*c_p);
-
- c_p = av_fetch(list, 1, FALSE);
- if (c_p == NULL) {
- Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
- }
- c2 = SvUV(*c_p);
-
- /* Folds that cross the 255/256 boundary are forbidden
- * if EXACTFL (and isnt a UTF8 locale), or EXACTFAA and
- * one is ASCIII. Since the pattern character is above
- * 255, and its only other match is below 256, the only
- * legal match will be to itself. We have thrown away
- * the original, so have to compute which is the one
- * above 255. */
- if ((c1 < 256) != (c2 < 256)) {
- if ((OP(text_node) == EXACTFL
- && ! IN_UTF8_CTYPE_LOCALE)
- || ((OP(text_node) == EXACTFAA
- || OP(text_node) == EXACTFAA_NO_TRIE)
- && (isASCII(c1) || isASCII(c2))))
- {
- if (c1 < 256) {
- c1 = c2;
- }
- else {
- c2 = c1;
- }
- }
- }
+ else { /* There are two. We already have one, get the other */
+ c2 = first_folds_to;
+
+ /* Folds that cross the 255/256 boundary are forbidden if
+ * EXACTFL (and isnt a UTF8 locale), or EXACTFAA and one is
+ * ASCIII. The only other match to c1 is c2, and since c1
+ * is above 255, c2 better be as well under these
+ * circumstances. If it isn't, it means the only legal
+ * match of c1 is itself. */
+ if ( c2 < 256
+ && ( ( OP(text_node) == EXACTFL
+ && ! IN_UTF8_CTYPE_LOCALE)
+ || (( OP(text_node) == EXACTFAA
+ || OP(text_node) == EXACTFAA_NO_TRIE)
+ && (isASCII(c1) || isASCII(c2)))))
+ {
+ c2 = c1;
}
}
}
*/
if(!NEXTCHR_IS_EOS && !ANYOF_BITMAP_TEST(scan, nextchr)) {
DEBUG_EXECUTE_r(
- Perl_re_exec_indentf( aTHX_ "%sfailed to match trie start class...%s\n",
+ Perl_re_exec_indentf( aTHX_ "%sTRIE: failed to match trie start class...%s\n",
depth, PL_colors[4], PL_colors[5])
);
sayNO_SILENT;
if (scan->flags == EXACTL || scan->flags == EXACTFLU8) {
_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
if (utf8_target
- && nextchr >= 0 /* guard against negative EOS value in nextchr */
+ && ! NEXTCHR_IS_EOS
&& UTF8_IS_ABOVE_LATIN1(nextchr)
&& scan->flags == EXACTL)
{
{
if (trie->states[ state ].wordnum) {
DEBUG_EXECUTE_r(
- Perl_re_exec_indentf( aTHX_ "%smatched empty string...%s\n",
+ Perl_re_exec_indentf( aTHX_ "%sTRIE: matched empty string...%s\n",
depth, PL_colors[4], PL_colors[5])
);
if (!trie->jump)
break;
} else {
DEBUG_EXECUTE_r(
- Perl_re_exec_indentf( aTHX_ "%sfailed to match trie start class...%s\n",
+ Perl_re_exec_indentf( aTHX_ "%sTRIE: failed to match trie start class...%s\n",
depth, PL_colors[4], PL_colors[5])
);
sayNO_SILENT;
DUMP_EXEC_POS( (char *)uc, scan, utf8_target, depth );
/* HERE */
PerlIO_printf( Perl_debug_log,
- "%*s%sState: %4" UVxf " Accepted: %c ",
+ "%*s%sTRIE: State: %4" UVxf " Accepted: %c ",
INDENT_CHARS(depth), "", PL_colors[4],
(UV)state, (accepted ? 'Y' : 'N'));
});
}
DEBUG_TRIE_EXECUTE_r(
Perl_re_printf( aTHX_
- "Charid:%3x CP:%4" UVxf " After State: %4" UVxf "%s\n",
+ "TRIE: Charid:%3x CP:%4" UVxf " After State: %4" UVxf "%s\n",
charid, uvc, (UV)state, PL_colors[5] );
);
}
}
DEBUG_EXECUTE_r(
- Perl_re_exec_indentf( aTHX_ "%sgot %" IVdf " possible matches%s\n",
+ Perl_re_exec_indentf( aTHX_ "%sTRIE: got %" IVdf " possible matches%s\n",
depth,
PL_colors[4], (IV)ST.accepted, PL_colors[5] );
);
? av_fetch(trie_words, ST.nextword - 1, 0) : NULL;
SV *sv= tmp ? sv_newmortal() : NULL;
- Perl_re_exec_indentf( aTHX_ "%sonly one match left, short-circuiting: #%d <%s>%s\n",
+ Perl_re_exec_indentf( aTHX_ "%sTRIE: only one match left, short-circuiting: #%d <%s>%s\n",
depth, PL_colors[4],
ST.nextword,
tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
rei = RXi_GET(re);
DEBUG_EXECUTE_r(
debug_start_match(re_sv, utf8_target, locinput,
- reginfo->strend, "Matching embedded");
+ reginfo->strend, "EVAL/GOSUB: Matching embedded");
);
startpoint = rei->program + 1;
EVAL_CLOSE_PAREN_CLEAR(st); /* ST.close_paren = 0;
if (n > maxopenparen)
maxopenparen = n;
DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_
- "rex=0x%" UVxf " offs=0x%" UVxf ": \\%" UVuf ": set %" IVdf " tmp; maxopenparen=%" UVuf "\n",
+ "OPEN: rex=0x%" UVxf " offs=0x%" UVxf ": \\%" UVuf ": set %" IVdf " tmp; maxopenparen=%" UVuf "\n",
depth,
PTR2UV(rex),
PTR2UV(rex->offs),
rex->offs[n].start = rex->offs[n].start_tmp; \
rex->offs[n].end = locinput - reginfo->strbeg; \
DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_ \
- "rex=0x%" UVxf " offs=0x%" UVxf ": \\%" UVuf ": set %" IVdf "..%" IVdf "\n", \
+ "CLOSE: rex=0x%" UVxf " offs=0x%" UVxf ": \\%" UVuf ": set %" IVdf "..%" IVdf "\n", \
depth, \
PTR2UV(rex), \
PTR2UV(rex->offs), \
ST.cache_mask = 0;
- DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_ "whilem: matched %ld out of %d..%d\n",
+ DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_ "WHILEM: matched %ld out of %d..%d\n",
depth, (long)n, min, max)
);
/* If degenerate A matches "", assume A done. */
if (locinput == cur_curlyx->u.curlyx.lastloc) {
- DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_ "whilem: empty match detected, trying continuation...\n",
+ DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_ "WHILEM: empty match detected, trying continuation...\n",
depth)
);
goto do_whilem_B_max;
Newxz(aux->poscache, size, char);
}
DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
- "%swhilem: Detected a super-linear match, switching on caching%s...\n",
+ "%sWHILEM: Detected a super-linear match, switching on caching%s...\n",
PL_colors[4], PL_colors[5])
);
}
mask = 1 << (offset % 8);
offset /= 8;
if (reginfo->info_aux->poscache[offset] & mask) {
- DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_ "whilem: (cache) already tried at this position...\n",
+ DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_ "WHILEM: (cache) already tried at this position...\n",
depth)
);
cur_curlyx->u.curlyx.count--;
case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
REGCP_UNWIND(ST.lastcp);
regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */
- DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_ "whilem: failed, trying continuation...\n",
+ DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_ "WHILEM: failed, trying continuation...\n",
depth)
);
do_whilem_B_max:
CACHEsayNO;
}
- DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_ "trying longer...\n", depth)
+ DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_ "WHILEM: B min fail: trying longer...\n", depth)
);
/* Try grabbing another A and see if it helps. */
cur_curlyx->u.curlyx.lastloc = locinput;
st->u.eval.prev_eval = cur_eval;
cur_eval = CUR_EVAL.prev_eval;
DEBUG_EXECUTE_r(
- Perl_re_exec_indentf( aTHX_ "EVAL trying tail ... (cur_eval=%p)\n",
+ Perl_re_exec_indentf( aTHX_ "END: EVAL trying tail ... (cur_eval=%p)\n",
depth, cur_eval););
if ( nochange_depth )
nochange_depth--;
if (locinput < reginfo->till) {
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
- "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
+ "%sEND: Match possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
PL_colors[4],
(long)(locinput - startpos),
(long)(reginfo->till - startpos),
case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
DEBUG_EXECUTE_r(
- Perl_re_exec_indentf( aTHX_ "%ssubpattern success...%s\n",
+ Perl_re_exec_indentf( aTHX_ "%sSUCCEED: subpattern success...%s\n",
depth, PL_colors[4], PL_colors[5]));
sayYES; /* Success! */
sv_commit = ST.mark_name;
DEBUG_EXECUTE_r({
- Perl_re_exec_indentf( aTHX_ "%ssetting cutpoint to mark:%" SVf "...%s\n",
+ Perl_re_exec_indentf( aTHX_ "%sMARKPOINT: next fail: setting cutpoint to mark:%" SVf "...%s\n",
depth,
PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
});