|NN char * const oregcomp_parse
Es |void|parse_lparen_question_flags|NN RExC_state_t *pRExC_state
Es |regnode*|reg_node |NN RExC_state_t *pRExC_state|U8 op
-Es |UV |reg_recode |const U8 value|NN SV **encp
Es |regnode*|regpiece |NN RExC_state_t *pRExC_state \
|NN I32 *flagp|U32 depth
Es |bool |grok_bslash_N |NN RExC_state_t *pRExC_state \
#define reg(a,b,c,d) S_reg(aTHX_ a,b,c,d)
#define reg2Lanode(a,b,c,d) S_reg2Lanode(aTHX_ a,b,c,d)
#define reg_node(a,b) S_reg_node(aTHX_ a,b)
-#define reg_recode(a,b) S_reg_recode(aTHX_ a,b)
#define reg_scan_name(a,b) S_reg_scan_name(aTHX_ a,b)
#define reg_skipcomment S_reg_skipcomment
#define reganode(a,b,c) S_reganode(aTHX_ a,b,c)
# endif
#endif /* !NO_LOCALE && HAS_SETLOCALE */
-/* Is $^ENCODING set, or are we under the encoding pragma? */
-#define IN_ENCODING UNLIKELY(PL_encoding \
- || (PL_lex_encoding && _get_encoding() != NULL))
-
#include <setjmp.h>
#ifdef I_SYS_PARAM
Perl_croak_no_modify();
}
- if (IN_ENCODING) {
- if (!SvUTF8(sv)) {
- /* XXX, here sv is utf8-ized as a side-effect!
- If encoding.pm is used properly, almost string-generating
- operations, including literal strings, chr(), input data, etc.
- should have been utf8-ized already, right?
- */
- sv_recode_to_utf8(sv, _get_encoding());
- }
- }
-
s = SvPV(sv, len);
if (chomping) {
if (s && len) {
}
rsptr = temp_buffer;
}
- else if (IN_ENCODING) {
- /* RS is 8 bit, encoding.pm is used.
- * Do not recode PL_rs as a side-effect. */
- svrecode = newSVpvn(rsptr, rslen);
- sv_recode_to_utf8(svrecode, _get_encoding());
- rsptr = SvPV_const(svrecode, rslen);
- rs_charlen = sv_len_utf8(svrecode);
- }
else {
/* RS is 8 bit, scalar is utf8. */
temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
little_utf8 = DO_UTF8(little);
if (big_utf8 ^ little_utf8) {
/* One needs to be upgraded. */
- if (little_utf8 && !IN_ENCODING) {
+ if (little_utf8) {
/* Well, maybe instead we might be able to downgrade the small
string? */
char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
temp = little_utf8
? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
- if (IN_ENCODING) {
- sv_recode_to_utf8(temp, _get_encoding());
- } else {
- sv_utf8_upgrade(temp);
- }
+ sv_utf8_upgrade(temp);
if (little_utf8) {
big = temp;
big_utf8 = TRUE;
STRLEN len;
const U8 *s = (U8*)SvPV_const(argsv, len);
- if (IN_ENCODING && SvPOK(argsv) && !DO_UTF8(argsv)) {
- SV * const tmpsv = sv_2mortal(newSVsv(argsv));
- s = (U8*)sv_recode_to_utf8(tmpsv, _get_encoding());
- len = UTF8SKIP(s); /* Should be well-formed; so this is its length */
- argsv = tmpsv;
- }
-
SETu(DO_UTF8(argsv)
? utf8n_to_uvchr(s, len, 0, UTF8_ALLOW_ANYUV)
: (UV)(*s));
*tmps = '\0';
(void)SvPOK_only(TARG);
- if (IN_ENCODING && !IN_BYTES) {
- sv_recode_to_utf8(TARG, _get_encoding());
- tmps = SvPVX(TARG);
- if (SvCUR(TARG) == 0
- || ! is_utf8_string((U8*)tmps, SvCUR(TARG))
- || UTF8_IS_REPLACEMENT((U8*) tmps, (U8*) tmps + SvCUR(TARG)))
- {
- SvGROW(TARG, 2);
- tmps = SvPVX(TARG);
- SvCUR_set(TARG, 1);
- *tmps++ = (char)value;
- *tmps = '\0';
- SvUTF8_off(TARG);
- }
- }
-
SETTARG;
return NORMAL;
}
if (DO_UTF8(TARG) && !doutf8) {
nsv = sv_newmortal();
SvSetSV(nsv, dstr);
- if (IN_ENCODING)
- sv_recode_to_utf8(nsv, _get_encoding());
- else
- sv_utf8_upgrade(nsv);
+ sv_utf8_upgrade(nsv);
c = SvPV_const(nsv, clen);
doutf8 = TRUE;
}
first = FALSE;
}
else {
- if (IN_ENCODING) {
- if (!nsv) nsv = sv_newmortal();
- sv_copypv(nsv, repl);
- if (!DO_UTF8(nsv)) sv_recode_to_utf8(nsv, _get_encoding());
- sv_catsv(dstr, nsv);
- }
- else sv_catsv(dstr, repl);
+ sv_catsv(dstr, repl);
if (UNLIKELY(SvTAINTED(repl)))
rxtainted |= SUBST_TAINT_REPL;
}
STATIC regnode* S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op);
#define PERL_ARGS_ASSERT_REG_NODE \
assert(pRExC_state)
-STATIC UV S_reg_recode(pTHX_ const U8 value, SV **encp);
-#define PERL_ARGS_ASSERT_REG_RECODE \
- assert(encp)
STATIC SV * S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags);
#define PERL_ARGS_ASSERT_REG_SCAN_NAME \
assert(pRExC_state)
}
-/*
- * reg_recode
- *
- * It returns the code point in utf8 for the value in *encp.
- * value: a code value in the source encoding
- * encp: a pointer to an Encode object
- *
- * If the result from Encode is not a single character,
- * it returns U+FFFD (Replacement character) and sets *encp to NULL.
- */
-STATIC UV
-S_reg_recode(pTHX_ const U8 value, SV **encp)
-{
- STRLEN numlen = 1;
- SV * const sv = newSVpvn_flags((const char *) &value, numlen, SVs_TEMP);
- const char * const s = *encp ? sv_recode_to_utf8(sv, *encp) : SvPVX(sv);
- const STRLEN newlen = SvCUR(sv);
- UV uv = UNICODE_REPLACEMENT;
-
- PERL_ARGS_ASSERT_REG_RECODE;
-
- if (newlen)
- uv = SvUTF8(sv)
- ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT)
- : *(U8*)s;
-
- if (!newlen || numlen != newlen) {
- uv = UNICODE_REPLACEMENT;
- *encp = NULL;
- }
- return uv;
-}
-
PERL_STATIC_INLINE U8
S_compute_EXACTish(RExC_state_t *pRExC_state)
{
vFAIL(error_msg);
}
ender = result;
- if (IN_ENCODING && ender < 0x100) {
- goto recode_encoding;
- }
if (ender > 0xff) {
REQUIRE_UTF8(flagp);
}
if (RExC_recode_x_to_native) {
ender = LATIN1_TO_NATIVE(ender);
}
- else
#endif
- if (IN_ENCODING) {
- goto recode_encoding;
- }
}
else {
REQUIRE_UTF8(flagp);
form_short_octal_warning(p, numlen));
}
}
- if (IN_ENCODING && ender < 0x100)
- goto recode_encoding;
- break;
- recode_encoding:
- if (! RExC_override_recoding) {
- SV* enc = _get_encoding();
- ender = reg_recode((U8)ender, &enc);
- if (!enc && PASS2)
- ckWARNreg(p, "Invalid escape in the specified encoding");
- REQUIRE_UTF8(flagp);
- }
break;
case '\0':
if (p >= RExC_end)
}
}
non_portable_endpoint++;
- if (IN_ENCODING && value < 0x100) {
- goto recode_encoding;
- }
break;
case 'x':
RExC_parse--; /* function expects to be pointed at the 'x' */
}
}
non_portable_endpoint++;
- if (IN_ENCODING && value < 0x100)
- goto recode_encoding;
break;
case 'c':
value = grok_bslash_c(*RExC_parse++, PASS2);
}
}
non_portable_endpoint++;
- if (IN_ENCODING && value < 0x100)
- goto recode_encoding;
- break;
- }
- recode_encoding:
- if (! RExC_override_recoding) {
- SV* enc = _get_encoding();
- value = reg_recode((U8)value, &enc);
- if (!enc) {
- if (strict) {
- vFAIL("Invalid escape in the specified encoding");
- }
- else if (PASS2) {
- ckWARNreg(RExC_parse,
- "Invalid escape in the specified encoding");
- }
- }
break;
}
default:
S_sv_uncow(aTHX_ sv, 0);
}
- if (IN_ENCODING && !(flags & SV_UTF8_NO_ENCODING)) {
- sv_recode_to_utf8(sv, _get_encoding());
- if (extra) SvGROW(sv, SvCUR(sv) + extra);
- return SvCUR(sv);
- }
-
if (SvCUR(sv) == 0) {
if (extra) SvGROW(sv, extra);
} else { /* Assume Latin-1/EBCDIC */
pv2 = SvPV_flags_const(sv2, cur2, flags);
if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) {
- /* Differing utf8ness.
- * Do not UTF8size the comparands as a side-effect. */
- if (IN_ENCODING) {
- if (SvUTF8(sv1)) {
- svrecode = newSVpvn(pv2, cur2);
- sv_recode_to_utf8(svrecode, _get_encoding());
- pv2 = SvPV_const(svrecode, cur2);
- }
- else {
- svrecode = newSVpvn(pv1, cur1);
- sv_recode_to_utf8(svrecode, _get_encoding());
- pv1 = SvPV_const(svrecode, cur1);
- }
- /* Now both are in UTF-8. */
- if (cur1 != cur2) {
- SvREFCNT_dec_NN(svrecode);
- return FALSE;
- }
- }
- else {
- if (SvUTF8(sv1)) {
+ /* Differing utf8ness. */
+ if (SvUTF8(sv1)) {
/* sv1 is the UTF-8 one */
return bytes_cmp_utf8((const U8*)pv2, cur2,
(const U8*)pv1, cur1) == 0;
- }
- else {
+ }
+ else {
/* sv2 is the UTF-8 one */
return bytes_cmp_utf8((const U8*)pv1, cur1,
(const U8*)pv2, cur2) == 0;
- }
- }
+ }
}
if (cur1 == cur2)
pv2 = SvPV_flags_const(sv2, cur2, flags);
if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) {
- /* Differing utf8ness.
- * Do not UTF8size the comparands as a side-effect. */
+ /* Differing utf8ness. */
if (SvUTF8(sv1)) {
- if (IN_ENCODING) {
- svrecode = newSVpvn(pv2, cur2);
- sv_recode_to_utf8(svrecode, _get_encoding());
- pv2 = SvPV_const(svrecode, cur2);
- }
- else {
const int retval = -bytes_cmp_utf8((const U8*)pv2, cur2,
(const U8*)pv1, cur1);
return retval ? retval < 0 ? -1 : +1 : 0;
- }
}
else {
- if (IN_ENCODING) {
- svrecode = newSVpvn(pv1, cur1);
- sv_recode_to_utf8(svrecode, _get_encoding());
- pv1 = SvPV_const(svrecode, cur1);
- }
- else {
const int retval = bytes_cmp_utf8((const U8*)pv1, cur1,
(const U8*)pv2, cur2);
return retval ? retval < 0 ? -1 : +1 : 0;
- }
}
}
" >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
SvPOK_on(sv);
- if (IN_ENCODING && !has_utf8) {
- sv_recode_to_utf8(sv, _get_encoding());
- if (SvUTF8(sv))
- has_utf8 = TRUE;
- }
if (has_utf8) {
SvUTF8_on(sv);
if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
if (!IN_BYTES) {
if (UTF)
PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
- else if (IN_ENCODING) {
- SV *name;
- dSP;
- ENTER;
- SAVETMPS;
- PUSHMARK(sp);
- XPUSHs(_get_encoding());
- PUTBACK;
- call_method("name", G_SCALAR);
- SPAGAIN;
- name = POPs;
- PUTBACK;
- PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
- Perl_form(aTHX_ ":encoding(%"SVf")",
- SVfARG(name)));
- FREETMPS;
- LEAVE;
- }
}
#endif
PL_rsfp = NULL;
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
SvUTF8_on(tmpstr);
- else if (IN_ENCODING)
- sv_recode_to_utf8(tmpstr, _get_encoding());
}
PL_lex_stuff = tmpstr;
pl_yylval.ival = op_type;
I32 termcode; /* terminating char. code */
U8 termstr[UTF8_MAXBYTES]; /* terminating string */
STRLEN termlen; /* length of terminating string */
- int last_off = 0; /* last position for nesting bracket */
line_t herelines;
PERL_ARGS_ASSERT_SCAN_STR;
sv_catpvn(sv, s, termlen);
s += termlen;
for (;;) {
- if (IN_ENCODING && !UTF && !re_reparse) {
- bool cont = TRUE;
-
- while (cont) {
- int offset = s - SvPVX_const(PL_linestr);
- const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
- &offset, (char*)termstr, termlen);
- const char *ns;
- char *svlast;
-
- if (SvIsCOW(PL_linestr)) {
- STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
- STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
- STRLEN last_lop_pos, re_eval_start_pos, s_pos;
- char *buf = SvPVX(PL_linestr);
- bufend_pos = PL_parser->bufend - buf;
- bufptr_pos = PL_parser->bufptr - buf;
- oldbufptr_pos = PL_parser->oldbufptr - buf;
- oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
- linestart_pos = PL_parser->linestart - buf;
- last_uni_pos = PL_parser->last_uni
- ? PL_parser->last_uni - buf
- : 0;
- last_lop_pos = PL_parser->last_lop
- ? PL_parser->last_lop - buf
- : 0;
- re_eval_start_pos =
- PL_parser->lex_shared->re_eval_start ?
- PL_parser->lex_shared->re_eval_start - buf : 0;
- s_pos = s - buf;
-
- sv_force_normal(PL_linestr);
-
- buf = SvPVX(PL_linestr);
- PL_parser->bufend = buf + bufend_pos;
- PL_parser->bufptr = buf + bufptr_pos;
- PL_parser->oldbufptr = buf + oldbufptr_pos;
- PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
- PL_parser->linestart = buf + linestart_pos;
- if (PL_parser->last_uni)
- PL_parser->last_uni = buf + last_uni_pos;
- if (PL_parser->last_lop)
- PL_parser->last_lop = buf + last_lop_pos;
- if (PL_parser->lex_shared->re_eval_start)
- PL_parser->lex_shared->re_eval_start =
- buf + re_eval_start_pos;
- s = buf + s_pos;
- }
- ns = SvPVX_const(PL_linestr) + offset;
- svlast = SvEND(sv) - 1;
-
- for (; s < ns; s++) {
- if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
- COPLINE_INC_WITH_HERELINES;
- }
- if (!found)
- goto read_more_line;
- else {
- /* handle quoted delimiters */
- if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
- const char *t;
- for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
- t--;
- if ((svlast-1 - t) % 2) {
- if (!keep_bracketed_quoted) {
- *(svlast-1) = term;
- *svlast = '\0';
- SvCUR_set(sv, SvCUR(sv) - 1);
- }
- continue;
- }
- }
- if (PL_multi_open == PL_multi_close) {
- cont = FALSE;
- }
- else {
- const char *t;
- char *w;
- for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
- /* At here, all closes are "was quoted" one,
- so we don't check PL_multi_close. */
- if (*t == '\\') {
- if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
- t++;
- else
- *w++ = *t++;
- }
- else if (*t == PL_multi_open)
- brackets++;
-
- *w = *t;
- }
- if (w < t) {
- *w++ = term;
- *w = '\0';
- SvCUR_set(sv, w - SvPVX_const(sv));
- }
- last_off = w - SvPVX(sv);
- if (--brackets <= 0)
- cont = FALSE;
- }
- }
- }
- if (!keep_delims) {
- SvCUR_set(sv, SvCUR(sv) - 1);
- *SvEND(sv) = '\0';
- }
- break;
- }
-
/* extend sv if need be */
SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
/* set 'to' to the next character in the sv's string */
to[-1] = '\n';
#endif
- read_more_line:
/* if we're out of file, or a read fails, bail and reset the current
line marker so we can report where the unterminated string began
*/
/* at this point, we have successfully read the delimited string */
- if (!IN_ENCODING || UTF || re_reparse) {
-
- if (keep_delims)
+ if (keep_delims)
sv_catpvn(sv, s, termlen);
- s += termlen;
- }
- if (has_utf8 || (IN_ENCODING && !re_reparse))
+ s += termlen;
+
+ if (has_utf8)
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
SvUTF8_on(stuff);
- else if (IN_ENCODING)
- sv_recode_to_utf8(stuff, _get_encoding());
}
NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
force_next(THING);