*/
STATIC Size_t
-S_do_trans_simple(pTHX_ SV * const sv)
+S_do_trans_simple(pTHX_ SV * const sv, const OPtrans_map * const tbl)
{
Size_t matches = 0;
STRLEN len;
U8 *s = (U8*)SvPV_nomg(sv,len);
U8 * const send = s+len;
- const OPtrans_map * const tbl = (OPtrans_map*)cPVOP->op_pv;
PERL_ARGS_ASSERT_DO_TRANS_SIMPLE;
- if (!tbl)
- Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__);
-
/* First, take care of non-UTF-8 input strings, because they're easy */
if (!SvUTF8(sv)) {
while (s < send) {
*/
STATIC Size_t
-S_do_trans_count(pTHX_ SV * const sv)
+S_do_trans_count(pTHX_ SV * const sv, const OPtrans_map * const tbl)
{
STRLEN len;
const U8 *s = (const U8*)SvPV_nomg_const(sv, len);
const U8 * const send = s + len;
Size_t matches = 0;
- const OPtrans_map * const tbl = (OPtrans_map*)cPVOP->op_pv;
PERL_ARGS_ASSERT_DO_TRANS_COUNT;
- if (!tbl)
- Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__);
-
if (!SvUTF8(sv)) {
while (s < send) {
if (tbl->map[*s++] >= 0)
*/
STATIC Size_t
-S_do_trans_complex(pTHX_ SV * const sv)
+S_do_trans_complex(pTHX_ SV * const sv, const OPtrans_map * const tbl)
{
STRLEN len;
U8 *s = (U8*)SvPV_nomg(sv, len);
U8 * const send = s+len;
Size_t matches = 0;
- const OPtrans_map * const tbl = (OPtrans_map*)cPVOP->op_pv;
PERL_ARGS_ASSERT_DO_TRANS_COMPLEX;
- if (!tbl)
- Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__);
-
if (!SvUTF8(sv)) {
U8 *d = s;
U8 * const dstart = d;
if (p != d - 1 || *p != *d)
p = d++;
}
- else if (ch == -1) /* -1 is unmapped character */
- *d++ = *s;
- else if (ch == -2) /* -2 is delete character */
+ else if (ch == (short) TR_UNMAPPED)
+ *d++ = *s;
+ else if (ch == (short) TR_DELETE)
matches++;
s++;
}
matches++;
*d++ = (U8)ch;
}
- else if (ch == -1) /* -1 is unmapped character */
+ else if (ch == (short) TR_UNMAPPED)
*d++ = *s;
- else if (ch == -2) /* -2 is delete character */
+ else if (ch == (short) TR_DELETE)
matches++;
s++;
}
d = s;
dstart = d;
- while (s < send) {
- STRLEN len;
- const UV comp = utf8n_to_uvchr(s, send - s, &len,
- UTF8_ALLOW_DEFAULT);
- UV ch;
- short sch;
+ while (s < send) {
+ STRLEN len;
+ const UV comp = utf8n_to_uvchr(s, send - s, &len,
+ UTF8_ALLOW_DEFAULT);
+ UV ch;
+ short sch;
- sch = tbl->map[comp >= size ? size : comp];
+ sch = tbl->map[comp >= size ? size : comp];
- if (sch >= 0) {
- ch = (UV)sch;
- replace:
- matches++;
- if (LIKELY(!squash || ch != pch)) {
- d = uvchr_to_utf8(d, ch);
- pch = ch;
- }
- s += len;
- continue;
- }
- else if (sch == -1) { /* -1 is unmapped character */
- Move(s, d, len, U8);
- d += len;
- }
- else if (sch == -2) /* -2 is delete character */
- matches++;
- else {
- assert(sch == -3); /* -3 is empty replacement */
- ch = comp;
- goto replace;
+ if (sch >= 0) {
+ ch = (UV)sch;
+ replace:
+ matches++;
+ if (LIKELY(!squash || ch != pch)) {
+ d = uvchr_to_utf8(d, ch);
+ pch = ch;
}
+ s += len;
+ continue;
+ }
+ else if (sch == (short) TR_UNMAPPED) {
+ Move(s, d, len, U8);
+ d += len;
+ }
+ else if (sch == (short) TR_DELETE)
+ matches++;
+ else {
+ assert(sch == (short) TR_R_EMPTY); /* empty replacement */
+ ch = comp;
+ goto replace;
+ }
- s += len;
- pch = 0xfeedface;
- }
+ s += len;
+ pch = 0xfeedface;
+ }
if (grows) {
sv_setpvn(sv, (char*)dstart, d - dstart);
UV puv = 0xfeedface;
while (s < send) {
UV uv = swash_fetch(rv, s, TRUE);
-
+
if (d > dend) {
const STRLEN clen = d - dstart;
const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
* we must also rely on it to choose the readonly strategy.
*/
if (flags & OPpTRANS_IDENTICAL) {
- return hasutf ? do_trans_count_utf8(sv) : do_trans_count(sv);
+ return hasutf ? do_trans_count_utf8(sv) : do_trans_count(sv, (OPtrans_map*)cPVOP->op_pv);
} else if (flags & (OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) {
- return hasutf ? do_trans_complex_utf8(sv) : do_trans_complex(sv);
+ return hasutf ? do_trans_complex_utf8(sv) : do_trans_complex(sv, (OPtrans_map*)cPVOP->op_pv);
} else {
- return hasutf ? do_trans_simple_utf8(sv) : do_trans_simple(sv);
+ return hasutf ? do_trans_simple_utf8(sv) : do_trans_simple(sv, (OPtrans_map*)cPVOP->op_pv);
}
}
if (!s) {
s = (unsigned char *)"";
}
-
+
PERL_ARGS_ASSERT_DO_VECGET;
if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
Perl_croak(aTHX_ "Illegal number of bits in vec");
if (SvUTF8(sv)) {
- if (Perl_sv_utf8_downgrade(aTHX_ sv, TRUE)) {
+ if (Perl_sv_utf8_downgrade_flags(aTHX_ sv, TRUE, 0)) {
/* PVX may have changed */
s = (unsigned char *) SvPV_flags(sv, srclen, svpv_flags);
}
else {
- Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
- "Use of strings with code points over 0xFF as"
- " arguments to vec is deprecated. This will"
- " be a fatal error in Perl 5.32");
+ Perl_croak(aTHX_ "Use of strings with code points over 0xFF as arguments to vec is forbidden");
}
}
SV_GMAGIC | SV_UNDEF_RETURNS_NULL);
if (SvUTF8(targ)) {
/* This is handled by the SvPOK_only below...
- if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
+ if (!Perl_sv_utf8_downgrade_flags(aTHX_ targ, TRUE, 0))
SvUTF8_off(targ);
*/
- (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
+ (void) Perl_sv_utf8_downgrade_flags(aTHX_ targ, TRUE, 0);
}
(void)SvPOK_only(targ);
void
Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
{
-#ifdef LIBERAL
long *dl;
long *ll;
long *rl;
-#endif
char *dc;
STRLEN leftlen;
STRLEN rightlen;
const char *lc;
const char *rc;
- STRLEN len;
+ STRLEN len = 0;
STRLEN lensave;
const char *lsave;
const char *rsave;
* on zeros without having to do it. In the case of '&', the result is
* zero, and the dangling portion is simply discarded. For '|' and '^', the
* result is the same as the other operand, so the dangling part is just
- * appended to the final result, unchanged. We currently accept above-FF
- * code points in the dangling portion, as that's how it has long worked,
- * and code depends on it staying that way. But it is now fatal for
- * above-FF to appear in the portion that does get operated on. Hence, any
- * above-FF must come only in the longer operand, and only in its dangling
- * portion. That means that at least one of the operands has to be
- * entirely non-UTF-8, and the length of that operand has to be before the
- * first above-FF in the other */
+ * appended to the final result, unchanged. As of perl-5.32, we no longer
+ * accept above-FF code points in the dangling portion.
+ */
if (left_utf8 || right_utf8) {
- if (left_utf8) {
- if (right_utf8 || rightlen > leftlen) {
- Perl_croak(aTHX_ fatal_above_ff_msg, PL_op_desc[optype]);
- }
- len = rightlen;
- }
- else if (right_utf8) {
- if (leftlen > rightlen) {
- Perl_croak(aTHX_ fatal_above_ff_msg, PL_op_desc[optype]);
- }
- len = leftlen;
- }
-
- Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
- deprecated_above_ff_msg, PL_op_desc[optype]);
+ Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[optype]);
}
else { /* Neither is UTF-8 */
len = MIN(leftlen, rightlen);
dc = SvPVX(sv); /* sv_usepvn() calls Renew() */
}
-#ifdef LIBERAL
if (len >= sizeof(long)*4 &&
- !((unsigned long)dc % sizeof(long)) &&
- !((unsigned long)lc % sizeof(long)) &&
- !((unsigned long)rc % sizeof(long))) /* It's almost always aligned... */
+ !(PTR2nat(dc) % sizeof(long)) &&
+ !(PTR2nat(lc) % sizeof(long)) &&
+ !(PTR2nat(rc) % sizeof(long))) /* It's almost always aligned... */
{
const STRLEN remainder = len % (sizeof(long)*4);
len /= (sizeof(long)*4);
len = remainder;
}
-#endif
+
switch (optype) {
case OP_BIT_AND:
while (len--)
len = lensave;
if (rightlen > len) {
if (dc == rc)
- SvCUR(sv) = rightlen;
+ SvCUR_set(sv, rightlen);
else
sv_catpvn_nomg(sv, rsave + len, rightlen - len);
}
else if (leftlen > len) {
if (dc == lc)
- SvCUR(sv) = leftlen;
+ SvCUR_set(sv, leftlen);
else
sv_catpvn_nomg(sv, lsave + len, leftlen - len);
}