+
+
+
+ /*
+ traverse the TRIE keeping track of all accepting states
+ we transition through until we get to a failing node.
+
+ we use two slightly different pieces of code to handle
+ the traversal depending on whether its case sensitive or
+ not. we reuse the accept code however. (this should probably
+ be turned into a macro.)
+
+ */
+ case TRIEF:
+ case TRIEFL:
+ {
+ U8 *uc = ( U8* )locinput;
+ U32 state = 1;
+ U16 charid = 0;
+ U32 base = 0;
+ UV uvc = 0;
+ STRLEN len = 0;
+ STRLEN foldlen = 0;
+ U8 *uscan = (U8*)NULL;
+ STRLEN bufflen=0;
+ accepted = 0;
+
+ trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
+
+ while ( state && uc <= (U8*)PL_regeol ) {
+
+ TRIE_CHECK_STATE_IS_ACCEPTING;
+
+ base = trie->states[ state ].trans.base;
+
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
+ REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
+ (UV)state, (UV)base, (UV)accepted );
+ );
+
+ if ( base ) {
+
+ if ( do_utf8 ) {
+ if ( foldlen>0 ) {
+ uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
+ foldlen -= len;
+ uscan += len;
+ len=0;
+ } else {
+ U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
+ uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
+ uvc = to_uni_fold( uvc, foldbuf, &foldlen );
+ foldlen -= UNISKIP( uvc );
+ uscan = foldbuf + UNISKIP( uvc );
+ }
+ } else {
+ uvc = (UV)*uc;
+ len = 1;
+ }
+
+ TRIE_HANDLE_CHAR;
+
+ } else {
+ state = 0;
+ }
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
+ charid, uvc, (UV)state, PL_colors[5] );
+ );
+ }
+ if ( !accepted ) {
+ sayNO;
+ } else {
+ goto TrieAccept;
+ }
+ }
+ /* unreached codepoint: we jump into the middle of the next case
+ from previous if blocks */
+ case TRIE:
+ {
+ U8 *uc = (U8*)locinput;
+ U32 state = 1;
+ U16 charid = 0;
+ U32 base = 0;
+ UV uvc = 0;
+ STRLEN len = 0;
+ STRLEN bufflen = 0;
+ accepted = 0;
+
+ trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
+
+ while ( state && uc <= (U8*)PL_regeol ) {
+
+ TRIE_CHECK_STATE_IS_ACCEPTING;
+
+ base = trie->states[ state ].trans.base;
+
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
+ REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
+ (UV)state, (UV)base, (UV)accepted );
+ );
+
+ if ( base ) {
+
+ if ( do_utf8 ) {
+ uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
+ } else {
+ uvc = (U32)*uc;
+ len = 1;
+ }
+
+ TRIE_HANDLE_CHAR;
+
+ } else {
+ state = 0;
+ }
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
+ charid, uvc, (UV)state, PL_colors[5] );
+ );
+ }
+ if ( !accepted ) {
+ sayNO;
+ }
+ }
+
+
+ /*
+ There was at least one accepting state that we
+ transitioned through. Presumably the number of accepting
+ states is going to be low, typically one or two. So we
+ simply scan through to find the one with lowest wordnum.
+ Once we find it, we swap the last state into its place
+ and decrement the size. We then try to match the rest of
+ the pattern at the point where the word ends, if we
+ succeed then we end the loop, otherwise the loop
+ eventually terminates once all of the accepting states
+ have been tried.
+ */
+ TrieAccept:
+ {
+ int gotit = 0;
+
+ if ( accepted == 1 ) {
+ DEBUG_EXECUTE_r({
+ SV **tmp = av_fetch( trie->words, accept_buff[ 0 ].wordnum-1, 0 );
+ PerlIO_printf( Perl_debug_log,
+ "%*s %sonly one match : #%d <%s>%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
+ accept_buff[ 0 ].wordnum,
+ tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
+ PL_colors[5] );
+ });
+ PL_reginput = (char *)accept_buff[ 0 ].endpos;
+ /* in this case we free tmps/leave before we call regmatch
+ as we wont be using accept_buff again. */
+ FREETMPS;
+ LEAVE;
+ gotit = regmatch( scan + NEXT_OFF( scan ) );
+ } else {
+ DEBUG_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
+ REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)accepted,
+ PL_colors[5] );
+ );
+ while ( !gotit && accepted-- ) {
+ U32 best = 0;
+ U32 cur;
+ for( cur = 1 ; cur <= accepted ; cur++ ) {
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
+ REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
+ (IV)best, accept_buff[ best ].wordnum, (IV)cur,
+ accept_buff[ cur ].wordnum, PL_colors[5] );
+ );
+
+ if ( accept_buff[ cur ].wordnum < accept_buff[ best ].wordnum )
+ best = cur;
+ }
+ DEBUG_EXECUTE_r({
+ SV **tmp = av_fetch( trie->words, accept_buff[ best ].wordnum - 1, 0 );
+ PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
+ accept_buff[best].wordnum,
+ tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
+ PL_colors[5] );
+ });
+ if ( best<accepted ) {
+ reg_trie_accepted tmp = accept_buff[ best ];
+ accept_buff[ best ] = accept_buff[ accepted ];
+ accept_buff[ accepted ] = tmp;
+ best = accepted;
+ }
+ PL_reginput = (char *)accept_buff[ best ].endpos;
+
+ /*
+ as far as I can tell we only need the SAVETMPS/FREETMPS
+ for re's with EVAL in them but I'm leaving them in for
+ all until I can be sure.
+ */
+ SAVETMPS;
+ gotit = regmatch( scan + NEXT_OFF( scan ) ) ;
+ FREETMPS;
+ }
+ FREETMPS;
+ LEAVE;
+ }
+
+ if ( gotit ) {
+ sayYES;
+ } else {
+ sayNO;
+ }
+ }
+ /* unreached codepoint */