X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/82e3a3828dedea0e9162b046b45c56394740d72e..d2b935c5ff6d7cb5675d22583ef8f480e51d7ba4:/doop.c?ds=sidebyside diff --git a/doop.c b/doop.c index 9abfcff..7cc227f 100644 --- a/doop.c +++ b/doop.c @@ -35,14 +35,14 @@ * or may not be utf8. */ -STATIC I32 +STATIC Size_t S_do_trans_simple(pTHX_ SV * const sv) { - I32 matches = 0; + Size_t matches = 0; STRLEN len; U8 *s = (U8*)SvPV_nomg(sv,len); U8 * const send = s+len; - const short * const tbl = (short*)cPVOP->op_pv; + const OPtrans_map * const tbl = (OPtrans_map*)cPVOP->op_pv; PERL_ARGS_ASSERT_DO_TRANS_SIMPLE; @@ -52,7 +52,7 @@ S_do_trans_simple(pTHX_ SV * const sv) /* First, take care of non-UTF-8 input strings, because they're easy */ if (!SvUTF8(sv)) { while (s < send) { - const I32 ch = tbl[*s]; + const short ch = tbl->map[*s]; if (ch >= 0) { matches++; *s = (U8)ch; @@ -62,7 +62,7 @@ S_do_trans_simple(pTHX_ SV * const sv) SvSETMAGIC(sv); } else { - const I32 grows = PL_op->op_private & OPpTRANS_GROWS; + const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS); U8 *d; U8 *dstart; @@ -74,13 +74,13 @@ S_do_trans_simple(pTHX_ SV * const sv) dstart = d; while (s < send) { STRLEN ulen; - I32 ch; + short ch; /* Need to check this, otherwise 128..255 won't match */ const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); - if (c < 0x100 && (ch = tbl[c]) >= 0) { + if (c < 0x100 && (ch = tbl->map[c]) >= 0) { matches++; - d = uvchr_to_utf8(d, ch); + d = uvchr_to_utf8(d, (UV)ch); s += ulen; } else { /* No match -> copy */ @@ -114,14 +114,14 @@ S_do_trans_simple(pTHX_ SV * const sv) * or may not be utf8. */ -STATIC I32 +STATIC Size_t S_do_trans_count(pTHX_ SV * const sv) { STRLEN len; const U8 *s = (const U8*)SvPV_nomg_const(sv, len); const U8 * const send = s + len; - I32 matches = 0; - const short * const tbl = (short*)cPVOP->op_pv; + Size_t matches = 0; + const OPtrans_map * const tbl = (OPtrans_map*)cPVOP->op_pv; PERL_ARGS_ASSERT_DO_TRANS_COUNT; @@ -130,17 +130,17 @@ S_do_trans_count(pTHX_ SV * const sv) if (!SvUTF8(sv)) { while (s < send) { - if (tbl[*s++] >= 0) + if (tbl->map[*s++] >= 0) matches++; } } else { - const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; + const bool complement = cBOOL(PL_op->op_private & OPpTRANS_COMPLEMENT); while (s < send) { STRLEN ulen; const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); if (c < 0x100) { - if (tbl[c] >= 0) + if (tbl->map[c] >= 0) matches++; } else if (complement) matches++; @@ -159,14 +159,14 @@ S_do_trans_count(pTHX_ SV * const sv) * or may not be utf8. */ -STATIC I32 +STATIC Size_t S_do_trans_complex(pTHX_ SV * const sv) { STRLEN len; U8 *s = (U8*)SvPV_nomg(sv, len); U8 * const send = s+len; - I32 matches = 0; - const short * const tbl = (short*)cPVOP->op_pv; + Size_t matches = 0; + const OPtrans_map * const tbl = (OPtrans_map*)cPVOP->op_pv; PERL_ARGS_ASSERT_DO_TRANS_COMPLEX; @@ -180,7 +180,7 @@ S_do_trans_complex(pTHX_ SV * const sv) if (PL_op->op_private & OPpTRANS_SQUASH) { const U8* p = send; while (s < send) { - const I32 ch = tbl[*s]; + const short ch = tbl->map[*s]; if (ch >= 0) { *d = (U8)ch; matches++; @@ -196,7 +196,7 @@ S_do_trans_complex(pTHX_ SV * const sv) } else { while (s < send) { - const I32 ch = tbl[*s]; + const short ch = tbl->map[*s]; if (ch >= 0) { matches++; *d++ = (U8)ch; @@ -212,104 +212,55 @@ S_do_trans_complex(pTHX_ SV * const sv) SvCUR_set(sv, d - dstart); } else { /* is utf8 */ - const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; - const I32 grows = PL_op->op_private & OPpTRANS_GROWS; - const I32 del = PL_op->op_private & OPpTRANS_DELETE; + const bool squash = cBOOL(PL_op->op_private & OPpTRANS_SQUASH); + const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS); U8 *d; U8 *dstart; - STRLEN rlen = 0; + Size_t size = tbl->size; + UV pch = 0xfeedface; if (grows) Newx(d, len*2+1, U8); else d = s; dstart = d; - if (complement && !del) - rlen = tbl[0x100]; - if (PL_op->op_private & OPpTRANS_SQUASH) { - UV pch = 0xfeedface; while (s < send) { STRLEN len; const UV comp = utf8n_to_uvchr(s, send - s, &len, UTF8_ALLOW_DEFAULT); - I32 ch; + UV ch; + short sch; - if (comp > 0xff) { - if (!complement) { - Move(s, d, len, U8); - d += len; - } - else { - /* use the implicit 0x100..0x7fffffff search range */ - matches++; - if (!del) { - ch = (rlen == 0) ? (I32)comp : - (comp - 0x100 < rlen) ? - tbl[comp+1] : tbl[0x100+rlen]; - if ((UV)ch != pch) { - d = uvchr_to_utf8(d, ch); - pch = (UV)ch; - } - s += len; - continue; - } - } - } - else if ((ch = tbl[comp]) >= 0) { + sch = tbl->map[comp >= size ? size : comp]; + + if (sch >= 0) { + ch = (UV)sch; + replace: matches++; - if ((UV)ch != pch) { + if (LIKELY(!squash || ch != pch)) { d = uvchr_to_utf8(d, ch); - pch = (UV)ch; + pch = ch; } s += len; continue; } - else if (ch == -1) { /* -1 is unmapped character */ + else if (sch == -1) { /* -1 is unmapped character */ Move(s, d, len, U8); d += len; } - else if (ch == -2) /* -2 is delete character */ + else if (sch == -2) /* -2 is delete character */ matches++; + else { + assert(sch == -3); /* -3 is empty replacement */ + ch = comp; + goto replace; + } + s += len; pch = 0xfeedface; } - } - else { - while (s < send) { - STRLEN len; - const UV comp = utf8n_to_uvchr(s, send - s, &len, - UTF8_ALLOW_DEFAULT); - I32 ch; - if (comp > 0xff) { - if (!complement) { - Move(s, d, len, U8); - d += len; - } - else { - /* use the implicit 0x100..0x7fffffff search range */ - matches++; - if (!del) { - if (comp - 0x100 < rlen) - d = uvchr_to_utf8(d, tbl[comp+1]); - else - d = uvchr_to_utf8(d, tbl[0x100+rlen]); - } - } - } - else if ((ch = tbl[comp]) >= 0) { - d = uvchr_to_utf8(d, ch); - matches++; - } - else if (ch == -1) { /* -1 is unmapped character */ - Move(s, d, len, U8); - d += len; - } - else if (ch == -2) /* -2 is delete character */ - matches++; - s += len; - } - } + if (grows) { sv_setpvn(sv, (char*)dstart, d - dstart); Safefree(dstart); @@ -332,7 +283,7 @@ S_do_trans_complex(pTHX_ SV * const sv) * or may not be utf8. */ -STATIC I32 +STATIC Size_t S_do_trans_simple_utf8(pTHX_ SV * const sv) { U8 *s; @@ -340,8 +291,8 @@ S_do_trans_simple_utf8(pTHX_ SV * const sv) U8 *d; U8 *start; U8 *dstart, *dend; - I32 matches = 0; - const I32 grows = PL_op->op_private & OPpTRANS_GROWS; + Size_t matches = 0; + const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS); STRLEN len; SV* const rv = #ifdef USE_ITHREADS @@ -441,13 +392,13 @@ S_do_trans_simple_utf8(pTHX_ SV * const sv) * or may not be utf8. */ -STATIC I32 +STATIC Size_t S_do_trans_count_utf8(pTHX_ SV * const sv) { const U8 *s; const U8 *start = NULL; const U8 *send; - I32 matches = 0; + Size_t matches = 0; STRLEN len; SV* const rv = #ifdef USE_ITHREADS @@ -492,15 +443,15 @@ S_do_trans_count_utf8(pTHX_ SV * const sv) * or may not be utf8. */ -STATIC I32 +STATIC Size_t S_do_trans_complex_utf8(pTHX_ SV * const sv) { U8 *start, *send; U8 *d; - I32 matches = 0; - const I32 squash = PL_op->op_private & OPpTRANS_SQUASH; - const I32 del = PL_op->op_private & OPpTRANS_DELETE; - const I32 grows = PL_op->op_private & OPpTRANS_GROWS; + Size_t matches = 0; + const bool squash = cBOOL(PL_op->op_private & OPpTRANS_SQUASH); + const bool del = cBOOL(PL_op->op_private & OPpTRANS_DELETE); + const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS); SV* const rv = #ifdef USE_ITHREADS PAD_SVl(cPADOP->op_padix); @@ -660,12 +611,12 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv) * Returns a count of number of characters translated */ -I32 +Size_t Perl_do_trans(pTHX_ SV *sv) { STRLEN len; - const I32 flags = PL_op->op_private; - const I32 hasutf = flags & (OPpTRANS_FROM_UTF | OPpTRANS_TO_UTF); + const U8 flags = PL_op->op_private; + const U8 hasutf = flags & (OPpTRANS_FROM_UTF | OPpTRANS_TO_UTF); PERL_ARGS_ASSERT_DO_TRANS; @@ -1067,7 +1018,7 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) STRLEN rightlen; const char *lc; const char *rc; - STRLEN len; + STRLEN len = 0; STRLEN lensave; const char *lsave; const char *rsave; @@ -1144,20 +1095,25 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) * portion. That means that at least one of the operands has to be * entirely non-UTF-8, and the length of that operand has to be before the * first above-FF in the other */ - if (left_utf8) { - if (right_utf8 || rightlen > leftlen) { - Perl_croak(aTHX_ fatal_above_ff_msg, PL_op_desc[optype]); + if (left_utf8 || right_utf8) { + if (left_utf8) { + if (right_utf8 || rightlen > leftlen) { + Perl_croak(aTHX_ fatal_above_ff_msg, PL_op_desc[optype]); + } + len = rightlen; } - len = rightlen; - } - else if (right_utf8) { - if (leftlen > rightlen) { - Perl_croak(aTHX_ fatal_above_ff_msg, PL_op_desc[optype]); + else if (right_utf8) { + if (leftlen > rightlen) { + Perl_croak(aTHX_ fatal_above_ff_msg, PL_op_desc[optype]); + } + len = leftlen; } - len = leftlen; + + Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), + deprecated_above_ff_msg, PL_op_desc[optype]); } else { /* Neither is UTF-8 */ - len = leftlen < rightlen ? leftlen : rightlen; + len = MIN(leftlen, rightlen); } lensave = len;