while (t < tend) {
cp[2*i] = utf8n_to_uvchr(t, tend-t, &ulen, flags);
t += ulen;
- /* the toker converts X-Y into (X, ILLEGAL_UTF8_BYTE, Y) */
- if (t < tend && *t == ILLEGAL_UTF8_BYTE) {
+ /* the toker converts X-Y into (X, RANGE_INDICATOR, Y) */
+ if (t < tend && *t == RANGE_INDICATOR) {
t++;
cp[2*i+1] = utf8n_to_uvchr(t, tend-t, &ulen, flags);
t += ulen;
/* Create a utf8 string containing the complement of the
* codepoint ranges. For example if cp[] contains [A,B], [C,D],
* then transv will contain the equivalent of:
- * join '', map chr, 0, ILLEGAL_UTF8_BYTE, A - 1,
- * B + 1, ILLEGAL_UTF8_BYTE, C - 1,
- * D + 1, ILLEGAL_UTF8_BYTE, 0x7fffffff;
- * A range of a single char skips the ILLEGAL_UTF8_BYTE and
+ * join '', map chr, 0, RANGE_INDICATOR, A - 1,
+ * B + 1, RANGE_INDICATOR, C - 1,
+ * D + 1, RANGE_INDICATOR, 0x7fffffff;
+ * A range of a single char skips the RANGE_INDICATOR and
* end cp.
*/
for (j = 0; j < i; j++) {
t = uvchr_to_utf8(tmpbuf,nextmin);
sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
if (diff > 1) {
- U8 range_mark = ILLEGAL_UTF8_BYTE;
+ U8 range_mark = RANGE_INDICATOR;
t = uvchr_to_utf8(tmpbuf, val - 1);
sv_catpvn(transv, (char *)&range_mark, 1);
sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
t = uvchr_to_utf8(tmpbuf,nextmin);
sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
{
- U8 range_mark = ILLEGAL_UTF8_BYTE;
+ U8 range_mark = RANGE_INDICATOR;
sv_catpvn(transv, (char *)&range_mark, 1);
}
t = uvchr_to_utf8(tmpbuf, 0x7fffffff);
if (tfirst > tlast) {
tfirst = (I32)utf8n_to_uvchr(t, tend - t, &ulen, flags);
t += ulen;
- if (t < tend && *t == ILLEGAL_UTF8_BYTE) { /* illegal utf8 val indicates range */
+ if (t < tend && *t == RANGE_INDICATOR) { /* illegal utf8 val indicates range */
t++;
tlast = (I32)utf8n_to_uvchr(t, tend - t, &ulen, flags);
t += ulen;
if (r < rend) {
rfirst = (I32)utf8n_to_uvchr(r, rend - r, &ulen, flags);
r += ulen;
- if (r < rend && *r == ILLEGAL_UTF8_BYTE) { /* illegal utf8 val indicates range */
+ if (r < rend && *r == RANGE_INDICATOR) { /* illegal utf8 val indicates range */
r++;
rlast = (I32)utf8n_to_uvchr(r, rend - r, &ulen, flags);
r += ulen;
* order to make the transliteration a simple table look-up.
* Ranges that extend above Latin1 have to be done differently, so
* there is no advantage to expanding them here, so they are
- * stored here as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte
- * signifies a hyphen without any possible ambiguity. On EBCDIC
- * machines, if the range is expressed as Unicode, the Latin1
- * portion is expanded out even if the range extends above
- * Latin1. This is because each code point in it has to be
- * processed here individually to get its native translation */
+ * stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is
+ * a byte that can't occur in legal UTF-8, and hence can signify a
+ * hyphen without any possible ambiguity. On EBCDIC machines, if
+ * the range is expressed as Unicode, the Latin1 portion is
+ * expanded out even if the range extends above Latin1. This is
+ * because each code point in it has to be processed here
+ * individually to get its native translation */
if (! dorange) {
while (e-- > max_ptr) {
*(e + 1) = *e;
}
- *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
+ *(e + 1) = (char) RANGE_INDICATOR;
goto range_done;
}
*d++ = (char) UTF8_TWO_BYTE_LO(0x100);
if (real_range_max > 0x100) {
if (real_range_max > 0x101) {
- *d++ = (char) ILLEGAL_UTF8_BYTE;
+ *d++ = (char) RANGE_INDICATOR;
}
d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
}