Commit | Line | Data |
---|---|---|
67d7b5ef JH |
1 | #ifndef ENCODE_H |
2 | #define ENCODE_H | |
3 | ||
4 | #ifndef U8 | |
85982a32 JH |
5 | /* |
6 | A tad devious this: | |
7 | perl normally has a #define for U8 - if that isn't present then we | |
8 | typedef it - leaving it #ifndef so we can do data parts without | |
67d7b5ef | 9 | getting extern references to the code parts |
85982a32 | 10 | */ |
67d7b5ef JH |
11 | typedef unsigned char U8; |
12 | #endif | |
13 | ||
14 | typedef struct encpage_s encpage_t; | |
15 | ||
67d7b5ef JH |
16 | struct encpage_s |
17 | { | |
85982a32 JH |
18 | /* fields ordered to pack nicely on 32-bit machines */ |
19 | const U8 *seq; /* Packed output sequences we generate | |
20 | if we match */ | |
21 | encpage_t *next; /* Page to go to if we match */ | |
22 | U8 min; /* Min value of octet to match this entry */ | |
23 | U8 max; /* Max value of octet to match this entry */ | |
24 | U8 dlen; /* destination length - | |
25 | size of entries in seq */ | |
26 | U8 slen; /* source length - | |
27 | number of source octets needed */ | |
67d7b5ef JH |
28 | }; |
29 | ||
30 | /* | |
85982a32 JH |
31 | At any point in a translation there is a page pointer which points |
32 | at an array of the above structures. | |
33 | ||
34 | Basic operation : | |
35 | get octet from source stream. | |
36 | if (octet >= min && octet < max) { | |
37 | if slen is 0 then we cannot represent this character. | |
38 | if we have less than slen octets (including this one) then | |
39 | we have a partial character. | |
40 | otherwise | |
41 | copy dlen octets from seq + dlen*(octet-min) to output | |
42 | (dlen may be zero if we don't know yet.) | |
43 | load page pointer with next to continue. | |
44 | (is slen is one this is end of a character) | |
45 | get next octet. | |
46 | } | |
47 | else { | |
48 | increment the page pointer to look at next slot in the array | |
49 | } | |
50 | ||
51 | arrays SHALL be constructed so there is an entry which matches | |
52 | ..0xFF at the end, and either maps it or indicates no | |
53 | representation. | |
54 | ||
55 | if MSB of slen is set then mapping is an approximate "FALLBACK" entry. | |
67d7b5ef JH |
56 | |
57 | */ | |
58 | ||
59 | ||
60 | typedef struct encode_s encode_t; | |
61 | struct encode_s | |
62 | { | |
85982a32 JH |
63 | encpage_t *t_utf8; /* Starting table for translation from |
64 | the encoding to UTF-8 form */ | |
65 | encpage_t *f_utf8; /* Starting table for translation | |
66 | from UTF-8 to the encoding */ | |
67 | const U8 *rep; /* Replacement character in this encoding | |
68 | e.g. "?" */ | |
69 | int replen; /* Number of octets in rep */ | |
70 | U8 min_el; /* Minimum octets to represent a character */ | |
71 | U8 max_el; /* Maximum octets to represent a character */ | |
72 | const char *name[2]; /* name(s) of this encoding */ | |
67d7b5ef JH |
73 | }; |
74 | ||
75 | #ifdef U8 | |
76 | /* See comment at top of file for deviousness */ | |
77 | ||
78 | extern int do_encode(encpage_t *enc, const U8 *src, STRLEN *slen, | |
220e2d4e IH |
79 | U8 *dst, STRLEN dlen, STRLEN *dout, int approx, |
80 | const U8 *term, STRLEN tlen); | |
67d7b5ef JH |
81 | |
82 | extern void Encode_DefineEncoding(encode_t *enc); | |
83 | ||
85982a32 | 84 | #endif /* U8 */ |
67d7b5ef JH |
85 | |
86 | #define ENCODE_NOSPACE 1 | |
87 | #define ENCODE_PARTIAL 2 | |
88 | #define ENCODE_NOREP 3 | |
89 | #define ENCODE_FALLBACK 4 | |
220e2d4e | 90 | #define ENCODE_FOUND_TERM 5 |
85982a32 JH |
91 | |
92 | #define FBCHAR_UTF8 "\xEF\xBF\xBD" | |
93 | ||
94 | #define ENCODE_DIE_ON_ERR 0x0001 /* croaks immediately */ | |
95 | #define ENCODE_WARN_ON_ERR 0x0002 /* warn on error; may proceed */ | |
96 | #define ENCODE_RETURN_ON_ERR 0x0004 /* immediately returns on NOREP */ | |
97 | #define ENCODE_LEAVE_SRC 0x0008 /* $src updated unless set */ | |
98 | #define ENCODE_PERLQQ 0x0100 /* perlqq fallback string */ | |
af1f55d9 JH |
99 | #define ENCODE_HTMLCREF 0x0200 /* HTML character ref. fb mode */ |
100 | #define ENCODE_XMLCREF 0x0400 /* XML character ref. fb mode */ | |
85982a32 JH |
101 | |
102 | #define ENCODE_FB_DEFAULT 0x0000 | |
103 | #define ENCODE_FB_CROAK 0x0001 | |
104 | #define ENCODE_FB_QUIET ENCODE_RETURN_ON_ERR | |
105 | #define ENCODE_FB_WARN (ENCODE_RETURN_ON_ERR|ENCODE_WARN_ON_ERR) | |
7f0d54d7 RGS |
106 | #define ENCODE_FB_PERLQQ (ENCODE_PERLQQ|ENCODE_LEAVE_SRC) |
107 | #define ENCODE_FB_HTMLCREF (ENCODE_HTMLCREF|ENCODE_LEAVE_SRC) | |
108 | #define ENCODE_FB_XMLCREF (ENCODE_XMLCREF|ENCODE_LEAVE_SRC) | |
85982a32 JH |
109 | |
110 | #endif /* ENCODE_H */ |