Commit | Line | Data |
---|---|---|
67d7b5ef JH |
1 | #ifndef ENCODE_H |
2 | #define ENCODE_H | |
3 | ||
4 | #ifndef U8 | |
5 | /* A tad devious this: | |
6 | perl normally has a #define for U8 - if that isn't present | |
7 | then we typedef it - leaving it #ifndef so we can do data parts without | |
8 | getting extern references to the code parts | |
9 | */ | |
10 | typedef unsigned char U8; | |
11 | #endif | |
12 | ||
13 | typedef struct encpage_s encpage_t; | |
14 | ||
15 | ||
16 | struct encpage_s | |
17 | { | |
18 | /* fields ordered to pack nicely on 32-bit machines */ | |
19 | const U8 *seq; /* Packed output sequences we generate if we match */ | |
20 | encpage_t *next; /* Page to go to if we match */ | |
21 | U8 min; /* Min value of octet to match this entry */ | |
22 | U8 max; /* Max value of octet to match this entry */ | |
23 | U8 dlen; /* destination length - size of entries in seq */ | |
24 | U8 slen; /* source length - number of source octets needed */ | |
25 | }; | |
26 | ||
27 | /* | |
28 | At any point in a translation there is a page pointer which points at an array | |
29 | of the above structures. | |
30 | ||
31 | Basic operation : | |
32 | get octet from source stream. | |
33 | if (octet >= min && octet < max) { | |
34 | if slen is 0 then we cannot represent this character. | |
35 | if we have less than slen octets (including this one) then we have a partial character. | |
36 | otherwise | |
37 | copy dlen octets from seq + dlen*(octet-min) to output | |
38 | (dlen may be zero if we don't know yet.) | |
39 | load page pointer with next to continue. | |
40 | (is slen is one this is end of a character) | |
41 | get next octet. | |
42 | } | |
43 | else { | |
44 | increment the page pointer to look at next slot in the array | |
45 | } | |
46 | ||
47 | arrays SHALL be constructed so there is an entry which matches ..0xFF at the end, | |
48 | and either maps it or indicates no representation. | |
49 | ||
50 | if MSB of slen is set then mapping is an approximate "FALLBACK" entry. | |
51 | ||
52 | */ | |
53 | ||
54 | ||
55 | typedef struct encode_s encode_t; | |
56 | struct encode_s | |
57 | { | |
58 | encpage_t *t_utf8; /* Starting table for translation from the encoding to UTF-8 form */ | |
59 | encpage_t *f_utf8; /* Starting table for translation from UTF-8 to the encoding */ | |
60 | const U8 *rep; /* Replacement character in this encoding e.g. "?" */ | |
61 | int replen; /* Number of octets to represent replacement character */ | |
62 | U8 min_el; /* Minimum octets to represent a character */ | |
63 | U8 max_el; /* Maximum octets to represent a character */ | |
64 | const char *name[2]; /* name(s) of this encoding */ | |
65 | }; | |
66 | ||
67 | #ifdef U8 | |
68 | /* See comment at top of file for deviousness */ | |
69 | ||
70 | extern int do_encode(encpage_t *enc, const U8 *src, STRLEN *slen, | |
71 | U8 *dst, STRLEN dlen, STRLEN *dout, int approx); | |
72 | ||
73 | extern void Encode_DefineEncoding(encode_t *enc); | |
74 | ||
75 | #endif | |
76 | ||
77 | #define ENCODE_NOSPACE 1 | |
78 | #define ENCODE_PARTIAL 2 | |
79 | #define ENCODE_NOREP 3 | |
80 | #define ENCODE_FALLBACK 4 | |
81 | #endif |