+
+
+
+ /*
+ traverse the TRIE keeping track of all accepting states
+ we transition through until we get to a failing node.
+
+
+ */
+ case TRIE:
+ case TRIEF:
+ case TRIEFL:
+ {
+ U8 *uc = ( U8* )locinput;
+ U32 state = 1;
+ U16 charid = 0;
+ U32 base = 0;
+ UV uvc = 0;
+ STRLEN len = 0;
+ STRLEN foldlen = 0;
+ U8 *uscan = (U8*)NULL;
+ STRLEN bufflen=0;
+ SV *sv_accept_buff = NULL;
+ const enum { trie_plain, trie_utf8, trie_uft8_fold }
+ trie_type = do_utf8 ?
+ (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
+ : trie_plain;
+
+ /* what trie are we using right now */
+ reg_trie_data *trie
+ = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
+ st->u.trie.accepted = 0; /* how many accepting states we have seen */
+ result = 0;
+
+ while ( state && uc <= (U8*)PL_regeol ) {
+
+ if (trie->states[ state ].wordnum) {
+ if (!st->u.trie.accepted ) {
+ ENTER;
+ SAVETMPS;
+ bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
+ sv_accept_buff=newSV(bufflen *
+ sizeof(reg_trie_accepted) - 1);
+ SvCUR_set(sv_accept_buff,
+ sizeof(reg_trie_accepted));
+ SvPOK_on(sv_accept_buff);
+ sv_2mortal(sv_accept_buff);
+ st->u.trie.accept_buff =
+ (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
+ }
+ else {
+ if (st->u.trie.accepted >= bufflen) {
+ bufflen *= 2;
+ st->u.trie.accept_buff =(reg_trie_accepted*)
+ SvGROW(sv_accept_buff,
+ bufflen * sizeof(reg_trie_accepted));
+ }
+ SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
+ + sizeof(reg_trie_accepted));
+ }
+ st->u.trie.accept_buff[st->u.trie.accepted].wordnum = trie->states[state].wordnum;
+ st->u.trie.accept_buff[st->u.trie.accepted].endpos = uc;
+ ++st->u.trie.accepted;
+ }
+
+ base = trie->states[ state ].trans.base;
+
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
+ REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
+ (UV)state, (UV)base, (UV)st->u.trie.accepted );
+ );
+
+ if ( base ) {
+ switch (trie_type) {
+ case trie_uft8_fold:
+ if ( foldlen>0 ) {
+ uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
+ foldlen -= len;
+ uscan += len;
+ len=0;
+ } else {
+ U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
+ uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
+ uvc = to_uni_fold( uvc, foldbuf, &foldlen );
+ foldlen -= UNISKIP( uvc );
+ uscan = foldbuf + UNISKIP( uvc );
+ }
+ break;
+ case trie_utf8:
+ uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
+ &len, uniflags );
+ break;
+ case trie_plain:
+ uvc = (UV)*uc;
+ len = 1;
+ }
+
+ if (uvc < 256) {
+ charid = trie->charmap[ uvc ];
+ }
+ else {
+ charid = 0;
+ if (trie->widecharmap) {
+ SV** svpp = (SV**)NULL;
+ svpp = hv_fetch(trie->widecharmap,
+ (char*)&uvc, sizeof(UV), 0);
+ if (svpp)
+ charid = (U16)SvIV(*svpp);
+ }
+ }
+
+ if (charid &&
+ (base + charid > trie->uniquecharcount )
+ && (base + charid - 1 - trie->uniquecharcount
+ < trie->lasttrans)
+ && trie->trans[base + charid - 1 -
+ trie->uniquecharcount].check == state)
+ {
+ state = trie->trans[base + charid - 1 -
+ trie->uniquecharcount ].next;
+ }
+ else {
+ state = 0;
+ }
+ uc += len;
+
+ }
+ else {
+ state = 0;
+ }
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
+ charid, uvc, (UV)state, PL_colors[5] );
+ );
+ }
+ if (!st->u.trie.accepted )
+ sayNO;
+
+ /*
+ There was at least one accepting state that we
+ transitioned through. Presumably the number of accepting
+ states is going to be low, typically one or two. So we
+ simply scan through to find the one with lowest wordnum.
+ Once we find it, we swap the last state into its place
+ and decrement the size. We then try to match the rest of
+ the pattern at the point where the word ends, if we
+ succeed then we end the loop, otherwise the loop
+ eventually terminates once all of the accepting states
+ have been tried.
+ */
+
+ if ( st->u.trie.accepted == 1 ) {
+ DEBUG_EXECUTE_r({
+ SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
+ PerlIO_printf( Perl_debug_log,
+ "%*s %sonly one match : #%d <%s>%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
+ st->u.trie.accept_buff[ 0 ].wordnum,
+ tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
+ PL_colors[5] );
+ });
+ PL_reginput = (char *)st->u.trie.accept_buff[ 0 ].endpos;
+ /* in this case we free tmps/leave before we call regmatch
+ as we wont be using accept_buff again. */
+ FREETMPS;
+ LEAVE;
+ REGMATCH(scan + NEXT_OFF(scan), TRIE1);
+ /*** all unsaved local vars undefined at this point */
+ } else {
+ DEBUG_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
+ REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->u.trie.accepted,
+ PL_colors[5] );
+ );
+ while ( !result && st->u.trie.accepted-- ) {
+ U32 best = 0;
+ U32 cur;
+ for( cur = 1 ; cur <= st->u.trie.accepted ; cur++ ) {
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
+ REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
+ (IV)best, st->u.trie.accept_buff[ best ].wordnum, (IV)cur,
+ st->u.trie.accept_buff[ cur ].wordnum, PL_colors[5] );
+ );
+
+ if (st->u.trie.accept_buff[cur].wordnum <
+ st->u.trie.accept_buff[best].wordnum)
+ best = cur;
+ }
+ DEBUG_EXECUTE_r({
+ reg_trie_data * const trie = (reg_trie_data*)
+ rex->data->data[ARG(scan)];
+ SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ best ].wordnum - 1, 0 );
+ PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
+ st->u.trie.accept_buff[best].wordnum,
+ tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
+ PL_colors[5] );
+ });
+ if ( best<st->u.trie.accepted ) {
+ reg_trie_accepted tmp = st->u.trie.accept_buff[ best ];
+ st->u.trie.accept_buff[ best ] = st->u.trie.accept_buff[ st->u.trie.accepted ];
+ st->u.trie.accept_buff[ st->u.trie.accepted ] = tmp;
+ best = st->u.trie.accepted;
+ }
+ PL_reginput = (char *)st->u.trie.accept_buff[ best ].endpos;
+
+ /*
+ as far as I can tell we only need the SAVETMPS/FREETMPS
+ for re's with EVAL in them but I'm leaving them in for
+ all until I can be sure.
+ */
+ SAVETMPS;
+ REGMATCH(scan + NEXT_OFF(scan), TRIE2);
+ /*** all unsaved local vars undefined at this point */
+ FREETMPS;
+ }
+ FREETMPS;
+ LEAVE;
+ }
+
+ if (result) {
+ sayYES;
+ } else {
+ sayNO;
+ }
+ }
+ /* unreached codepoint */
+ case EXACT: {
+ char *s = STRING(scan);
+ st->ln = STR_LEN(scan);