+ {
+ # This generates the dfa table for perl extended UTF-8, which accepts
+ # surrogates, non-characters, and accepts start bytes up through FE
+ # (start byte FF has to be handled outside this dfa). The class numbers
+ # for start bytes are constrained so that they can be used as a shift
+ # count for masking off the leading one bits
+ #
+ # The classes are
+ # 00-9F 0
+ # A0-A1 7 Not legal immediately after start bytes F0 F8 FC
+ # FE
+ # A2-A3 8 Not legal immediately after start bytes F0 F8 FC
+ # A4-A7 9 Not legal immediately after start bytes F0 F8
+ # A8-AF 10 Not legal immediately after start bytes F0
+ # B0-BF 11
+ # C0-C4 1
+ # C5-DF 2
+ # E0 1
+ # E1-EF 3
+ # F0 12
+ # F1-F7 4
+ # F8 13
+ # F9-FB 5
+ # FC 14
+ # FD 6
+ # FE 15
+ # FF 1
+ #
+ # Here's the I8 for the code points before which overlongs occur:
+ # U+4000: \xF0\xB0\xA0\xA0
+ # U+40000: \xF8\xA8\xA0\xA0\xA0
+ # U+400000: \xFC\xA4\xA0\xA0\xA0\xA0
+ # U+4000000: \xFE\xA2\xA0\xA0\xA0\xA0\xA0
+ #
+ # The first part of the table maps bytes to character classes to reduce
+ # the size of the transition table and create bitmasks.
+ #
+ # The second part is a transition table that maps a combination of a
+ # state of the automaton and a character class to a new state. The
+ # numbering of the original nodes is retained, but some have been split
+ # so that there are new nodes. They mean:
+ # N0 The initial state, and final accepting one.
+ # N1 One continuation byte (A0-BF) left. This is transitioned to
+ # immediately when the start byte indicates a two-byte sequence
+ # N2 Two continuation bytes left.
+ # N3 Three continuation bytes left.
+ # N4 Four continuation bytes left.
+ # N5 Five continuation bytes left.
+ # N6 Start byte is F0. Continuation bytes A[0-F] are illegal
+ # (overlong); the other continuations transition to N2
+ # N7 Start byte is F8. Continuation bytes A[0-7] are illegal
+ # (overlong); the other continuations transition to N3
+ # N8 Start byte is FC. Continuation bytes A[0-3] are illegal
+ # (overlong); the other continuations transition to N4
+ # N9 Start byte is FE. Continuation bytes A[01] are illegal
+ # (overlong); the other continuations transition to N5
+ # 1 Reject. All transitions not mentioned above (except the single
+ # byte ones (as they are always legal) are to this state.
+
+ my $NUM_CLASSES = 16;
+ my $N0 = 0;
+ my $N1 = $N0 + $NUM_CLASSES;
+ my $N2 = $N1 + $NUM_CLASSES;
+ my $N3 = $N2 + $NUM_CLASSES;
+ my $N4 = $N3 + $NUM_CLASSES;
+ my $N5 = $N4 + $NUM_CLASSES;
+ my $N6 = $N5 + $NUM_CLASSES;
+ my $N7 = $N6 + $NUM_CLASSES;
+ my $N8 = $N7 + $NUM_CLASSES;
+ my $N9 = $N8 + $NUM_CLASSES;
+ my $N10 = $N9 + $NUM_CLASSES;
+
+ my @perl_extended_utf8_dfa;
+ my @i8 = (
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 00-0F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 10-1F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 20-2F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 30-3F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 40-4F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 50-5F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 60-6F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 70-7F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 80-8F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 90-9F
+ 7, 7, 8, 8, 9, 9, 9, 9,10,10,10,10,10,10,10,10, # A0-AF
+ 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, # B0-BF
+ 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # C0-CF
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # D0-DF
+ 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, # E0-EF
+ 12, 4, 4, 4, 4, 4, 4, 4,13, 5, 5, 5,14, 6,15, 1, # F0-FF
+ );
+ $perl_extended_utf8_dfa[$i82utf[$_]] = $i8[$_] for (0 .. 255);
+ push @perl_extended_utf8_dfa, (
+ # Class:
+ # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ 0, 1,$N1,$N2,$N3,$N4,$N5, 1, 1, 1, 1, 1,$N6,$N7,$N8,$N9, # N0
+ 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, # N1
+ 1, 1, 1, 1, 1, 1, 1,$N1,$N1,$N1,$N1,$N1, 1, 1, 1, 1, # N2
+ 1, 1, 1, 1, 1, 1, 1,$N2,$N2,$N2,$N2,$N2, 1, 1, 1, 1, # N3
+ 1, 1, 1, 1, 1, 1, 1,$N3,$N3,$N3,$N3,$N3, 1, 1, 1, 1, # N4
+ 1, 1, 1, 1, 1, 1, 1,$N4,$N4,$N4,$N4,$N4, 1, 1, 1, 1, # N5
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,$N2, 1, 1, 1, 1, # N6
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,$N3,$N3, 1, 1, 1, 1, # N7
+ 1, 1, 1, 1, 1, 1, 1, 1, 1,$N4,$N4,$N4, 1, 1, 1, 1, # N8
+ 1, 1, 1, 1, 1, 1, 1, 1,$N5,$N5,$N5,$N5, 1, 1, 1, 1, # N9
+ );
+ output_table(\@perl_extended_utf8_dfa, "PL_extended_utf8_dfa_tab",
+ $NUM_CLASSES);
+ }
+
+ {
+ # This generates the dfa table for strict UTF-8, which rejects
+ # surrogates, non-characters, and above Unicode.
+ #
+ # The classes are
+ # 00-9F 0 Always legal at start
+ # A0 10 Not legal immediately after start bytes F0 F8
+ # A1 11 Not legal immediately after start bytes F0 F8,
+ # A2-A7 12 Not legal immediately after start bytes F0 F8 F9
+ # A8,AA,AC 13 Not legal immediately after start bytes F0 F9
+ # A9,AB,AD 14 Not legal immediately after start byte F0
+ # AE 15 Not legal immediately after start byte F0
+ # AF 16 Not legal immediately after start bytes F0
+ # B[0248AC] 17 Not legal immediately after start byte F9
+ # B[1359D] 18 Not legal immediately after start byte F9
+ # B6 19 Not legal immediately after start byte F9
+ # B7 20 Not legal immediately after start byte F9
+ # BE 21 Not legal immediately after start byte F9
+ # BF 22 Not legal immediately after start byte F9
+ # C0-C4 1 (reject, all are overlong)
+ # C5-DF 2 Accepts any legal continuation
+ # E0 1 (reject, all are overlong)
+ # E1-EF 3 Accepts any legal continuation
+ # F0 8 (has overlongs)
+ # F1 6 (has surrogates, non-chars)
+ # F2,F4,F6 4 Accepts any legal continuation
+ # F3,F5,F7 5 (has non-chars)
+ # F8 9 (has overlongs, non-chars)
+ # F9 7 (has non-chars, non-Unicode)
+ # FA-FF 1 (reject, all are non-Unicode)
+ #
+ # Here's the I8 for enough code points so that you can figure out what's
+ # going on:
+ #
+ # U+D800: \xF1\xB6\xA0\xA0
+ # U+DFFF: \xF1\xB7\xBF\xBF
+ # U+FDD0: \xF1\xBF\xAE\xB0
+ # U+FDEF: \xF1\xBF\xAF\xAF
+ # U+FFFE: \xF1\xBF\xBF\xBE
+ # U+1FFFE: \xF3\xBF\xBF\xBE
+ # U+2FFFE: \xF5\xBF\xBF\xBE
+ # U+3FFFE: \xF7\xBF\xBF\xBE
+ # U+4FFFE: \xF8\xA9\xBF\xBF\xBE
+ # U+5FFFE: \xF8\xAB\xBF\xBF\xBE
+ # U+6FFFE: \xF8\xAD\xBF\xBF\xBE
+ # U+7FFFE: \xF8\xAF\xBF\xBF\xBE
+ # U+8FFFE: \xF8\xB1\xBF\xBF\xBE
+ # U+9FFFE: \xF8\xB3\xBF\xBF\xBE
+ # U+AFFFE: \xF8\xB5\xBF\xBF\xBE
+ # U+BFFFE: \xF8\xB7\xBF\xBF\xBE
+ # U+CFFFE: \xF8\xB9\xBF\xBF\xBE
+ # U+DFFFE: \xF8\xBB\xBF\xBF\xBE
+ # U+EFFFE: \xF8\xBD\xBF\xBF\xBE
+ # U+FFFFE: \xF8\xBF\xBF\xBF\xBE
+ # U+10FFFE: \xF9\xA1\xBF\xBF\xBE
+ #
+ # The first part of the table maps bytes to character classes to reduce
+ # the size of the transition table and create bitmasks.
+ #
+ # The second part is a transition table that maps a combination of a
+ # state of the automaton and a character class to a new state. The
+ # numbering of the original nodes is retained, but some have been split
+ # so that there are new nodes. They mean:
+ # N0 The initial state, and final accepting one.
+ # N1 One continuation byte (A0-BF) left. This is transitioned to
+ # immediately when the start byte indicates a two-byte sequence
+ # N2 Two continuation bytes left.
+ # N3 Three continuation bytes left.
+ # N4 Start byte is F0. Continuation bytes A[0-F] are illegal
+ # (overlong); the other continuations transition to N2
+ # N5 Start byte is F1. Continuation bytes B6 and B7 are illegal
+ # (surrogates); BF transitions to N9; the other continuations to
+ # N2
+ # N6 Start byte is F[357]. Continuation byte BF transitions to N12;
+ # other continuations to N2
+ # N5 Start byte is F8. Continuation bytes A[0-7] are illegal
+ # (overlong); continuations A[9BDF] and B[13579BDF] transition to
+ # N14; the other continuations to N3
+ # N8 Start byte is F9. Continuation byte A0 transitions to N3; A1
+ # to N14; the other continuation bytes are illegal.
+ # N9 Initial sequence is F1 BF. Continuation byte AE transitions to
+ # state N10; AF to N11; BF to N13; the other continuations to N1.
+ # N10 Initial sequence is F1 BF AE. Continuation bytes B0-BF are
+ # illegal (non-chars); the other continuations are legal
+ # N11 Initial sequence is F1 BF AF. Continuation bytes A0-AF are
+ # illegal (non-chars); the other continuations are legal
+ # N12 Initial sequence is F[357] BF. Continuation bytes BF
+ # transitions to N13; the other continuations to N1
+ # N13 Initial sequence is F[1357] BF BF or F8 x BF (where x is
+ # something that can lead to a non-char. Continuation bytes BE
+ # and BF are illegal (non-chars); the other continuations are
+ # legal
+ # N14 Initial sequence is F8 A[9BDF]; or F8 B[13579BDF]; or F9 A1.
+ # Continuation byte BF transitions to N13; the other
+ # continuations to N2
+ # 1 Reject. All transitions not mentioned above (except the single
+ # byte ones (as they are always legal) are to this state.
+
+ my $NUM_CLASSES = 23;
+ my $N0 = 0;
+ my $N1 = $N0 + $NUM_CLASSES;
+ my $N2 = $N1 + $NUM_CLASSES;
+ my $N3 = $N2 + $NUM_CLASSES;
+ my $N4 = $N3 + $NUM_CLASSES;
+ my $N5 = $N4 + $NUM_CLASSES;
+ my $N6 = $N5 + $NUM_CLASSES;
+ my $N7 = $N6 + $NUM_CLASSES;
+ my $N8 = $N7 + $NUM_CLASSES;
+ my $N9 = $N8 + $NUM_CLASSES;
+ my $N10 = $N9 + $NUM_CLASSES;
+ my $N11 = $N10 + $NUM_CLASSES;
+ my $N12 = $N11 + $NUM_CLASSES;
+ my $N13 = $N12 + $NUM_CLASSES;
+ my $N14 = $N13 + $NUM_CLASSES;
+
+ my @strict_utf8_dfa;
+ my @i8 = (
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 00-0F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 10-1F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 20-2F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 30-3F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 40-4F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 50-5F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 60-6F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 70-7F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 80-8F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 90-9F
+ 10,11,12,12,12,12,12,12,13,14,13,14,13,14,15,16, # A0-AF
+ 17,18,17,18,17,18,19,20,17,18,17,18,17,18,21,22, # B0-BF
+ 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # C0-CF
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # D0-DF
+ 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, # E0-EF
+ 8, 6, 4, 5, 4, 5, 4, 5, 9, 7, 1, 1, 1, 1, 1, 1, # F0-FF
+ );
+ $strict_utf8_dfa[$i82utf[$_]] = $i8[$_] for (0 .. 255);
+ push @strict_utf8_dfa, (
+ # Class:
+ # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
+ 0,1,$N1,$N2,$N3,$N6,$N5,$N8,$N4,$N7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # N0
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # N1
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, $N1, $N1, $N1, $N1, $N1, $N1, $N1, $N1, $N1, $N1, $N1, $N1, $N1, # N2
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, # N3
+
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, $N2, $N2, $N2, $N2, $N2, $N2, # N4
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, 1, 1, $N2, $N9, # N5
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2,$N12, # N6
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, $N3,$N14, $N3,$N14, $N3,$N14, $N3,$N14, $N3,$N14, # N7
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, $N3,$N14, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # N8
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, $N1, $N1, $N1, $N1, $N1,$N10,$N11, $N1, $N1, $N1, $N1, $N1,$N13, # N9
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, # N10
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, # N11
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, $N1, $N1, $N1, $N1, $N1, $N1, $N1, $N1, $N1, $N1, $N1, $N1,$N13, # N12
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, # N13
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2, $N2,$N13, # N14
+ );
+ output_table(\@strict_utf8_dfa, "PL_strict_utf8_dfa_tab", $NUM_CLASSES);
+ }
+
+ {
+ # This generates the dfa table for C9 strict UTF-8, which rejects
+ # surrogates and above Unicode, but allows non-characters,.
+ #
+ # The classes are
+ # 00-9F 0 Always legal at start
+ # A0-A1 9 Not legal immediately after start bytes F0 F8
+ # A2-A7 10 Not legal immediately after start bytes F0 F8 F9
+ # A8-AF 11 Not legal immediately after start bytes F0 F9
+ # B0-B5,B8-BF 12 Not legal immediately after start byte F9
+ # B6,B7 13
+ # C0-C4 1 (reject, all are overlong)
+ # C5-DF 2 Accepts any legal continuation
+ # E0 1 (reject, all are overlong)
+ # E1-EF 3 Accepts any legal continuation
+ # F0 6 (has overlongs)
+ # F1 5 (has surrogates)
+ # F2-F7 4 Accepts any legal continuation
+ # F8 8 (has overlongs)
+ # F9 7 (has non-Unicode)
+ # FA-FF 1 (reject, all are non-Unicode)
+ #
+ # The first part of the table maps bytes to character classes to reduce
+ # the size of the transition table and create bitmasks.
+ #
+ # The second part is a transition table that maps a combination of a
+ # state of the automaton and a character class to a new state. The
+ # numbering of the original nodes is retained, but some have been split
+ # so that there are new nodes. They mean:
+ # N0 The initial state, and final accepting one.
+ # N1 One continuation byte (A0-BF) left. This is transitioned to
+ # immediately when the start byte indicates a two-byte sequence
+ # N2 Two continuation bytes left.
+ # N3 Three continuation bytes left.
+ # N4 Start byte is F0. Continuation bytes A[0-F] are illegal
+ # (overlong); the other continuations transition to N2
+ # N5 Start byte is F1. B6 and B7 are illegal (surrogates); the
+ # other continuations transition to N2
+ # N6 Start byte is F8. Continuation bytes A[0-7] are illegal
+ # (overlong); the other continuations transition to N3
+ # N7 Start byte is F9. Continuation bytes A0 and A1 transition to
+ # N3; the other continuation bytes are illegal (non-Unicode)
+ # 1 Reject. All transitions not mentioned above (except the single
+ # byte ones (as they are always legal) are to this state.
+
+ my $NUM_CLASSES = 14;
+ my $N0 = 0;
+ my $N1 = $N0 + $NUM_CLASSES;
+ my $N2 = $N1 + $NUM_CLASSES;
+ my $N3 = $N2 + $NUM_CLASSES;
+ my $N4 = $N3 + $NUM_CLASSES;
+ my $N5 = $N4 + $NUM_CLASSES;
+ my $N6 = $N5 + $NUM_CLASSES;
+ my $N7 = $N6 + $NUM_CLASSES;
+
+ my @C9_utf8_dfa;
+ my @i8 = (
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 00-0F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 10-1F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 20-2F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 30-3F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 40-4F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 50-5F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 60-6F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 70-7F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 80-8F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 90-9F
+ 9, 9,10,10,10,10,10,10,11,11,11,11,11,11,11,11, # A0-AF
+ 12,12,12,12,12,12,13,13,12,12,12,12,12,12,12,12, # B0-BF
+ 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # C0-CF
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # D0-DF
+ 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, # E0-EF
+ 6, 5, 4, 4, 4, 4, 4, 4, 8, 7, 1, 1, 1, 1, 1, 1, # F0-FF
+ );
+ $C9_utf8_dfa[$i82utf[$_]] = $i8[$_] for (0 .. 255);
+ push @C9_utf8_dfa, (
+ # Class:
+ # 0 1 2 3 4 5 6 7 8 9 10 11 12 13
+ 0,1,$N1,$N2,$N3,$N5,$N4,$N7,$N6, 1, 1, 1, 1, 1, # N0
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, # N1
+ 1,1, 1, 1, 1, 1, 1, 1, 1,$N1, $N1, $N1, $N1, $N1, # N2
+ 1,1, 1, 1, 1, 1, 1, 1, 1,$N2, $N2, $N2, $N2, $N2, # N3
+
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, $N2, $N2, # N4
+ 1,1, 1, 1, 1, 1, 1, 1, 1,$N2, $N2, $N2, $N2, 1, # N5
+ 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, $N3, $N3, $N3, # N6
+ 1,1, 1, 1, 1, 1, 1, 1, 1,$N3, 1, 1, 1, 1, # N7
+ );
+ output_table(\@C9_utf8_dfa, "PL_c9_utf8_dfa_tab", $NUM_CLASSES);
+ }
+