This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade to Encode 2.0902
[perl5.git] / ext / Encode / Encode / encode.h
CommitLineData
67d7b5ef
JH
1#ifndef ENCODE_H
2#define ENCODE_H
3
4#ifndef U8
85982a32
JH
5/*
6 A tad devious this:
7 perl normally has a #define for U8 - if that isn't present then we
8 typedef it - leaving it #ifndef so we can do data parts without
67d7b5ef 9 getting extern references to the code parts
85982a32 10*/
67d7b5ef
JH
11typedef unsigned char U8;
12#endif
13
14typedef struct encpage_s encpage_t;
15
67d7b5ef
JH
16struct encpage_s
17{
85982a32
JH
18 /* fields ordered to pack nicely on 32-bit machines */
19 const U8 *seq; /* Packed output sequences we generate
20 if we match */
21 encpage_t *next; /* Page to go to if we match */
22 U8 min; /* Min value of octet to match this entry */
23 U8 max; /* Max value of octet to match this entry */
24 U8 dlen; /* destination length -
25 size of entries in seq */
26 U8 slen; /* source length -
27 number of source octets needed */
67d7b5ef
JH
28};
29
30/*
85982a32
JH
31 At any point in a translation there is a page pointer which points
32 at an array of the above structures.
33
34 Basic operation :
35 get octet from source stream.
36 if (octet >= min && octet < max) {
37 if slen is 0 then we cannot represent this character.
38 if we have less than slen octets (including this one) then
39 we have a partial character.
40 otherwise
41 copy dlen octets from seq + dlen*(octet-min) to output
42 (dlen may be zero if we don't know yet.)
43 load page pointer with next to continue.
44 (is slen is one this is end of a character)
45 get next octet.
46 }
47 else {
48 increment the page pointer to look at next slot in the array
49 }
50
51 arrays SHALL be constructed so there is an entry which matches
52 ..0xFF at the end, and either maps it or indicates no
53 representation.
54
55 if MSB of slen is set then mapping is an approximate "FALLBACK" entry.
67d7b5ef
JH
56
57*/
58
59
60typedef struct encode_s encode_t;
61struct encode_s
62{
85982a32
JH
63 encpage_t *t_utf8; /* Starting table for translation from
64 the encoding to UTF-8 form */
65 encpage_t *f_utf8; /* Starting table for translation
66 from UTF-8 to the encoding */
67 const U8 *rep; /* Replacement character in this encoding
68 e.g. "?" */
69 int replen; /* Number of octets in rep */
70 U8 min_el; /* Minimum octets to represent a character */
71 U8 max_el; /* Maximum octets to represent a character */
72 const char *name[2]; /* name(s) of this encoding */
67d7b5ef
JH
73};
74
75#ifdef U8
76/* See comment at top of file for deviousness */
77
78extern int do_encode(encpage_t *enc, const U8 *src, STRLEN *slen,
220e2d4e
IH
79 U8 *dst, STRLEN dlen, STRLEN *dout, int approx,
80 const U8 *term, STRLEN tlen);
67d7b5ef
JH
81
82extern void Encode_DefineEncoding(encode_t *enc);
83
85982a32 84#endif /* U8 */
67d7b5ef
JH
85
86#define ENCODE_NOSPACE 1
87#define ENCODE_PARTIAL 2
88#define ENCODE_NOREP 3
89#define ENCODE_FALLBACK 4
220e2d4e 90#define ENCODE_FOUND_TERM 5
85982a32
JH
91
92#define FBCHAR_UTF8 "\xEF\xBF\xBD"
93
94#define ENCODE_DIE_ON_ERR 0x0001 /* croaks immediately */
95#define ENCODE_WARN_ON_ERR 0x0002 /* warn on error; may proceed */
96#define ENCODE_RETURN_ON_ERR 0x0004 /* immediately returns on NOREP */
97#define ENCODE_LEAVE_SRC 0x0008 /* $src updated unless set */
98#define ENCODE_PERLQQ 0x0100 /* perlqq fallback string */
af1f55d9
JH
99#define ENCODE_HTMLCREF 0x0200 /* HTML character ref. fb mode */
100#define ENCODE_XMLCREF 0x0400 /* XML character ref. fb mode */
85982a32
JH
101
102#define ENCODE_FB_DEFAULT 0x0000
103#define ENCODE_FB_CROAK 0x0001
104#define ENCODE_FB_QUIET ENCODE_RETURN_ON_ERR
105#define ENCODE_FB_WARN (ENCODE_RETURN_ON_ERR|ENCODE_WARN_ON_ERR)
7f0d54d7
RGS
106#define ENCODE_FB_PERLQQ (ENCODE_PERLQQ|ENCODE_LEAVE_SRC)
107#define ENCODE_FB_HTMLCREF (ENCODE_HTMLCREF|ENCODE_LEAVE_SRC)
108#define ENCODE_FB_XMLCREF (ENCODE_XMLCREF|ENCODE_LEAVE_SRC)
85982a32
JH
109
110#endif /* ENCODE_H */