parser->linestr = flags & LEX_START_COPIED
? SvREFCNT_inc_simple_NN(line)
: newSVpvn_flags(s, len, SvUTF8(line));
- sv_catpvs(parser->linestr, "\n;");
+ sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
} else {
- parser->linestr = newSVpvs("\n;");
+ parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
}
parser->oldoldbufptr =
parser->oldbufptr =
ENTER;
SAVESPTR(PL_warnhook);
PL_warnhook = PERL_WARNHOOK_FATAL;
- utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
+ utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
LEAVE;
}
}
}
else {
assert(p < e -1 );
- *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
+ *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
p += 2;
}
}
bufend = PL_parser->bufend;
}
}
- unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
+ unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
if (retlen == (STRLEN)-1) {
/* malformed UTF-8 */
ENTER;
SAVESPTR(PL_warnhook);
PL_warnhook = PERL_WARNHOOK_FATAL;
- utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
+ utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
LEAVE;
}
return unichar;
S_skipspace2(pTHX_ char *s, SV **svp)
{
char *start;
- const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
const I32 startoff = s - SvPVX(PL_linestr);
PERL_ARGS_ASSERT_SKIPSPACE2;
s = skipspace(s);
- PL_bufptr = SvPVX(PL_linestr) + bufptroff;
if (!PL_madskills || !svp)
return s;
start = SvPVX(PL_linestr) + startoff;
sv_setpvn(sv, buf, len);
(void)SvIOK_on(sv);
SvIV_set(sv, 0);
- av_store(av, (I32)CopLINE(PL_curcop), sv);
+ av_store(av, CopLINE(PL_curcop), sv);
}
}
{
/* If warnings are on, this will print a more detailed analysis of what
* is wrong than the error message below */
- utf8n_to_uvuni(first_bad_char_loc,
+ utf8n_to_uvchr(first_bad_char_loc,
e - ((char *) first_bad_char_loc),
NULL, 0);
}
s++;
} else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
- if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
+ if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
goto bad_charname;
}
s += 2;
s++;
}
else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
- if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
- *(s+1)))))
+ if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
{
goto bad_charname;
}
if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
/* If warnings are on, this will print a more detailed analysis of
* what is wrong than the error message below */
- utf8n_to_uvuni(first_bad_char_loc,
+ utf8n_to_uvchr(first_bad_char_loc,
(char *) first_bad_char_loc - str,
NULL, 0);
char *e = d++;
while (e-- > c)
*(e + 1) = *e;
- *c = (char)UTF_TO_NATIVE(0xff);
+ *c = (char) ILLEGAL_UTF8_BYTE;
/* mark the range as done, and continue */
dorange = FALSE;
didrange = TRUE;
#ifdef EBCDIC
if (literal_endpoint == 2 &&
- ((isLOWER(min) && isLOWER(max)) ||
- (isUPPER(min) && isUPPER(max)))) {
- if (isLOWER(min)) {
+ ((isLOWER_A(min) && isLOWER_A(max)) ||
+ (isUPPER_A(min) && isUPPER_A(max)))) {
+ if (isLOWER_A(min)) {
for (i = min; i <= max; i++)
- if (isLOWER(i))
- *d++ = NATIVE_TO_NEED(has_utf8,i);
+ if (isLOWER_A(i))
+ *d++ = i;
} else {
for (i = min; i <= max; i++)
- if (isUPPER(i))
- *d++ = NATIVE_TO_NEED(has_utf8,i);
+ if (isUPPER_A(i))
+ *d++ = i;
}
}
else
for (i = min; i <= max; i++)
#ifdef EBCDIC
if (has_utf8) {
- const U8 ch = (U8)NATIVE_TO_UTF(i);
- if (UNI_IS_INVARIANT(ch))
- *d++ = (U8)i;
- else {
- *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
- *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
- }
+ append_utf8_from_native_byte(i, &d);
}
else
#endif
if (uvmax) {
d = (char*)uvchr_to_utf8((U8*)d, 0x100);
if (uvmax > 0x101)
- *d++ = (char)UTF_TO_NATIVE(0xff);
+ *d++ = (char) ILLEGAL_UTF8_BYTE;
if (uvmax > 0x100)
d = (char*)uvchr_to_utf8((U8*)d, uvmax);
}
&& !native_range
#endif
) {
- *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
+ *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
s++;
continue;
}
else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
if (s[2] == '#') {
while (s+1 < send && *s != ')')
- *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+ *d++ = *s++;
}
else if (!PL_lex_casemods &&
( s[2] == '{' /* This should match regcomp.c */
else if (*s == '#' && PL_lex_inpat && !in_charclass &&
((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
while (s+1 < send && *s != '\n')
- *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+ *d++ = *s++;
}
/* no further processing of single-quoted regex */
|| s[1] != '{'
|| regcurly(s + 1, FALSE)))
{
- *d++ = NATIVE_TO_NEED(has_utf8,'\\');
+ *d++ = '\\';
goto default_action;
}
{
I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
STRLEN len = 3;
- uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
+ uv = grok_oct(s, &len, &flags, NULL);
s += len;
if (len < 3 && s < send && isDIGIT(*s)
&& ckWARN(WARN_MISC))
* UTF-8 sequence they can end up as, except if they force us
* to recode the rest of the string into utf8 */
- /* Here uv is the ordinal of the next character being added in
- * unicode (converted from native). */
- if (!UNI_IS_INVARIANT(uv)) {
+ /* Here uv is the ordinal of the next character being added */
+ if (!NATIVE_IS_INVARIANT(uv)) {
if (!has_utf8 && uv > 255) {
/* Might need to recode whatever we have accumulated so
* far if it contains any chars variant in utf8 or
}
if (has_utf8) {
- d = (char*)uvuni_to_utf8((U8*)d, uv);
+ d = (char*)uvchr_to_utf8((U8*)d, uv);
if (PL_lex_inwhat == OP_TRANS &&
PL_sublex_info.sub_op) {
PL_sublex_info.sub_op->op_private |=
* now, while preserving the fact that it was a named character
* so that the regex compiler knows this */
- /* This section of code doesn't generally use the
- * NATIVE_TO_NEED() macro to transform the input. I (khw) did
- * a close examination of this macro and determined it is a
- * no-op except on utfebcdic variant characters. Every
- * character generated by this that would normally need to be
- * enclosed by this macro is invariant, so the macro is not
- * needed, and would complicate use of copy(). XXX There are
- * other parts of this file where the macro is used
- * inconsistently, but are saved by it being a no-op */
-
/* The structure of this section of code (besides checking for
* errors and upgrading to utf8) is:
* Further disambiguate between the two meanings of \N, and if
has_utf8 = TRUE;
}
- /* Add the string to the output */
+ /* Add the (Unicode) code point to the output. */
if (UNI_IS_INVARIANT(uv)) {
- *d++ = (char) uv;
+ *d++ = (char) LATIN1_TO_NATIVE(uv);
}
- else d = (char*)uvuni_to_utf8((U8*)d, uv);
+ else {
+ d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
+ }
}
}
else /* Here is \N{NAME} but not \N{U+...}. */
char hex_string[2 * UTF8_MAXBYTES + 5];
/* Get the first character of the result. */
- U32 uv = utf8n_to_uvuni((U8 *) str,
+ U32 uv = utf8n_to_uvchr((U8 *) str,
len,
&char_length,
UTF8_ALLOW_ANYUV);
/* Convert first code point to hex, including
- * the boiler plate before it. For all these,
- * we convert to native format so that
- * downstream code can continue to assume the
- * input is native */
+ * the boiler plate before it. */
output_length =
my_snprintf(hex_string, sizeof(hex_string),
- "\\N{U+%X",
- (unsigned int) UNI_TO_NATIVE(uv));
+ "\\N{U+%X",
+ (unsigned int) uv);
/* Make sure there is enough space to hold it */
d = off + SvGROW(sv, off
* its ordinal in hex */
while ((str += char_length) < str_end) {
const STRLEN off = d - SvPVX_const(sv);
- U32 uv = utf8n_to_uvuni((U8 *) str,
+ U32 uv = utf8n_to_uvchr((U8 *) str,
str_end - str,
&char_length,
UTF8_ALLOW_ANYUV);
output_length =
my_snprintf(hex_string,
- sizeof(hex_string),
- ".%X",
- (unsigned int) UNI_TO_NATIVE(uv));
+ sizeof(hex_string),
+ ".%X",
+ (unsigned int) uv);
d = off + SvGROW(sv, off
+ output_length
/* printf-style backslashes, formfeeds, newlines, etc */
case 'b':
- *d++ = NATIVE_TO_NEED(has_utf8,'\b');
+ *d++ = '\b';
break;
case 'n':
- *d++ = NATIVE_TO_NEED(has_utf8,'\n');
+ *d++ = '\n';
break;
case 'r':
- *d++ = NATIVE_TO_NEED(has_utf8,'\r');
+ *d++ = '\r';
break;
case 'f':
- *d++ = NATIVE_TO_NEED(has_utf8,'\f');
+ *d++ = '\f';
break;
case 't':
- *d++ = NATIVE_TO_NEED(has_utf8,'\t');
+ *d++ = '\t';
break;
case 'e':
- *d++ = ASCII_TO_NEED(has_utf8,'\033');
+ *d++ = ASCII_TO_NATIVE('\033');
break;
case 'a':
- *d++ = ASCII_TO_NEED(has_utf8,'\007');
+ *d++ = '\a';
break;
} /* end switch */
#endif
}
else {
- *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+ *d++ = *s++;
}
} /* while loop to process each character */
s = SKIPSPACE0(s);
}
else {
-/* if (PL_madskills && PL_lex_formbrack) { */
- d = s;
- while (d < PL_bufend && *d != '\n')
- d++;
- if (d < PL_bufend)
- d++;
- else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
+#endif
+ if (PL_madskills) d = s;
+ while (s < PL_bufend && *s != '\n')
+ s++;
+ if (s < PL_bufend)
+ {
+ s++;
+ if (s < PL_bufend)
+ incline(s);
+ }
+ else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
Perl_croak(aTHX_ "panic: input overflow");
+#ifdef PERL_MAD
if (PL_madskills && CopLINE(PL_curcop) >= 1) {
if (!PL_thiswhite)
PL_thiswhite = newSVpvs("");
sv_setpvs(PL_thiswhite, "");
PL_faketokens = 0;
}
- sv_catpvn(PL_thiswhite, s, d - s);
+ sv_catpvn(PL_thiswhite, d, s - d);
}
- s = d;
-/* }
- *s = '\0';
- PL_bufend = s; */
}
-#else
- while (s < PL_bufend && *s != '\n')
- s++;
- if (s < PL_bufend)
- s++;
- else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
- Perl_croak(aTHX_ "panic: input overflow");
#endif
}
goto retry;
case KEY___END__: {
GV *gv;
if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
- const char *pname = "main";
- STRLEN plen = 4;
- U32 putf8 = 0;
- if (PL_tokenbuf[2] == 'D')
- {
- HV * const stash =
- PL_curstash ? PL_curstash : PL_defstash;
- pname = HvNAME_get(stash);
- plen = HvNAMELEN (stash);
- if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
- }
- gv = gv_fetchpvn_flags(
- Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
- plen+6, GV_ADD|putf8, SVt_PVIO
- );
+ HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
+ ? PL_curstash
+ : PL_defstash;
+ gv = (GV *)*hv_fetchs(stash, "DATA", 1);
+ if (!isGV(gv))
+ gv_init(gv,stash,"DATA",4,0);
GvMULTI_on(gv);
if (!GvIO(gv))
GvIOp(gv) = newIO();
/* scan_str
- takes: start position in buffer
- keep_quoted preserve \ on the embedded delimiter(s)
- keep_delims preserve the delimiters around the string
- re_reparse compiling a run-time /(?{})/:
- collapse // to /, and skip encoding src
+ takes:
+ start position in buffer
+ keep_quoted preserve \ on the embedded delimiter(s)
+ keep_delims preserve the delimiters around the string
+ re_reparse compiling a run-time /(?{})/:
+ collapse // to /, and skip encoding src
+ deprecate_escaped_meta issue a deprecation warning for cer-
+ tain paired metacharacters that appear
+ escaped within it
returns: position to continue reading from buffer
side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
updates the read buffer.
STATIC char *
S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
- bool deprecate_escaped_meta /* Should we issue a deprecation warning
- for certain paired metacharacters that
- appear escaped within it */
+ bool deprecate_escaped_meta
)
{
dVAR;
/* Append native character for the rev point */
tmpend = uvchr_to_utf8(tmpbuf, rev);
sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
- if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
+ if (!NATIVE_IS_INVARIANT(rev))
SvUTF8_on(sv);
if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
s = ++pos;