From 430f723e6dceffbacbd1d7160d35ee8a2b698448 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 16 Oct 2020 21:08:13 -0600 Subject: [PATCH] Fix up delimcpy_no_escape() I modified this function in ab01742544b98b5b5e13d8e1a6e9df474b9e3005, and did not fully understand the edge cases. This commit now handles those properly, the same as plain delimcpy() does. --- embed.fnc | 6 ++-- embed.h | 2 +- ext/XS-APItest/APItest.xs | 27 ++++++++++++++++++ ext/XS-APItest/t/delimcpy.t | 34 ++++++++++++----------- proto.h | 4 +-- util.c | 67 +++++++++++++++++++++++++++------------------ 6 files changed, 91 insertions(+), 49 deletions(-) diff --git a/embed.fnc b/embed.fnc index e088cd8..5a30f46 100644 --- a/embed.fnc +++ b/embed.fnc @@ -756,10 +756,10 @@ pR |SV * |defelem_target |NN SV *sv|NULLOK MAGIC *mg ATpd |char* |delimcpy|NN char* to|NN const char* to_end \ |NN const char* from|NN const char* from_end \ |const int delim|NN I32* retlen -Tpd |char* |delimcpy_no_escape|NN char* to|NN const char* toend \ +ETpd |char* |delimcpy_no_escape|NN char* to|NN const char* to_end \ |NN const char* from \ - |NN const char* fromend|int delim \ - |NN I32* retlen + |NN const char* from_end \ + |const int delim|NN I32* retlen : Used in op.c, perl.c px |void |delete_eval_scope Aprd |OP* |die_sv |NN SV *baseex diff --git a/embed.h b/embed.h index b53d015..2cc6934 100644 --- a/embed.h +++ b/embed.h @@ -927,6 +927,7 @@ #define cntrl_to_mnemonic Perl_cntrl_to_mnemonic #define current_re_engine() Perl_current_re_engine(aTHX) #define cv_ckproto_len_flags(a,b,c,d,e) Perl_cv_ckproto_len_flags(aTHX_ a,b,c,d,e) +#define delimcpy_no_escape Perl_delimcpy_no_escape #define do_uniprop_match Perl_do_uniprop_match #define get_and_check_backslash_N_name(a,b,c,d) Perl_get_and_check_backslash_N_name(aTHX_ a,b,c,d) #define get_deprecated_property_msg Perl_get_deprecated_property_msg @@ -1282,7 +1283,6 @@ #define deb_stack_all() Perl_deb_stack_all(aTHX) #define defelem_target(a,b) Perl_defelem_target(aTHX_ a,b) #define delete_eval_scope() Perl_delete_eval_scope(aTHX) -#define delimcpy_no_escape Perl_delimcpy_no_escape #define die_unwind(a) Perl_die_unwind(aTHX_ a) #define do_aexec5(a,b,c,d,e) Perl_do_aexec5(aTHX_ a,b,c,d,e) #define do_dump_pad(a,b,c,d) Perl_do_dump_pad(aTHX_ a,b,c,d) diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index cc31a39..549bf54 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -6821,6 +6821,33 @@ test_delimcpy(SV * from_sv, STRLEN trunc_from, char delim, STRLEN to_len, STRLEN OUTPUT: RETVAL +AV * +test_delimcpy_no_escape(SV * from_sv, STRLEN trunc_from, char delim, STRLEN to_len, STRLEN trunc_to, char poison = '?') + PREINIT: + char * from; + AV *av; + I32 retlen; + char * from_pos_after_copy; + char * to; + CODE: + from = SvPV_nolen(from_sv); + Newx(to, to_len, char); + PoisonWith(to, to_len, char, poison); + assert(trunc_from <= SvCUR(from_sv)); + /* trunc_to allows us to throttle the output size available */ + assert(trunc_to <= to_len); + from_pos_after_copy = delimcpy_no_escape(to, to + trunc_to, + from, from + trunc_from, + delim, &retlen); + av = newAV(); + av_push(av, newSVpvn(to, to_len)); + av_push(av, newSVuv(retlen)); + av_push(av, newSVuv(from_pos_after_copy - from)); + Safefree(to); + RETVAL = av; + OUTPUT: + RETVAL + SV * test_Gconvert(SV * number, SV * num_digits) PREINIT: diff --git a/ext/XS-APItest/t/delimcpy.t b/ext/XS-APItest/t/delimcpy.t index 9d2c7d1..6ea3c43 100644 --- a/ext/XS-APItest/t/delimcpy.t +++ b/ext/XS-APItest/t/delimcpy.t @@ -132,25 +132,26 @@ foreach my $d ("x", "\0") { # Try both printable and NUL delimiters } while ($trunc_dest_len > 0); } -{ - # Repeat a few of the tests with a backslash delimiter, which means there - # is no possibiliby of an escape +# Repeat a few of the tests with a backslash delimiter, which means there +# is no possibiliby of an escape. And this escape-less form can be used to +# also do a general test on 'delimcpy_no_escape' +foreach my $d ("x", "\0", '\\') { + for my $func (qw(delimcpy delimcpy_no_escape)) { + next if $func eq 'delimcpy' && $d ne '\\'; + my $test_func = "test_$func"; - my $d = "\\"; my $source = $ib; my $source_len = 1; my $should_be = $source; - pass 'delimiter is a backslash for the rest of the tests'; - - $ret = test_delimcpy($source, $source_len, $d, $source_len, $source_len, $poison); + $ret = eval "$test_func(\$source, \$source_len, \$d, \$source_len, \$source_len, \$poison)"; is($ret->[0], expected($source, $source_len, $poison, $source_len), - "delimcpy works when there is no delimiter at all"); + "$func works when there is no delimiter at all"); is($ret->[1], $source_len, "Destination length is correct"); is($ret->[2], 1, "Source advance is correct"); $source .= $d; - $ret = test_delimcpy($source, $source_len, $d, $source_len, $source_len, $poison); + $ret = eval "$test_func(\$source, \$source_len, \$d, \$source_len, \$source_len, \$poison)"; is($ret->[0], expected($source, $source_len, $poison, $source_len), "Works when delimiter is just beyond the examined portion"); is($ret->[1], $source_len, "Destination length is correct"); @@ -158,7 +159,7 @@ foreach my $d ("x", "\0") { # Try both printable and NUL delimiters # Delimiter in first byte my $actual_dest_len = 5; - $ret = test_delimcpy($d, 1, $d, $actual_dest_len, $actual_dest_len, $poison); + $ret = eval "$test_func(\$d, 1, \$d, \$actual_dest_len, \$actual_dest_len, \$poison)"; is($ret->[0], "\0" . $poison x ($actual_dest_len - 1), "Copied correctly when delimiter is first character"); is($ret->[1], 0, "0 bytes copied"); @@ -169,23 +170,24 @@ foreach my $d ("x", "\0") { # Try both printable and NUL delimiters my $with_NULL = $source . "\0"; $source .= $d . ($ib x 7); $source_len = length $source; - $ret = test_delimcpy($source, $source_len, $d, $source_len, $source_len, $poison); + $ret = eval "$test_func(\$source, \$source_len, \$d, \$source_len, \$source_len, \$poison)"; is($ret->[0], expected($with_NULL, $len_sans_delim + 1, $poison, $source_len), - "delimcpy works when delim is in middle of source, plenty of room"); + "$func works when delim is in middle of source, plenty of room"); is($ret->[1], $len_sans_delim, "Destination length is correct"); is($ret->[2], $len_sans_delim, "Source advance is correct"); - $ret = test_delimcpy($source, $source_len, $d, $source_len, $len_sans_delim, $poison); + $ret = eval "$test_func(\$source, \$source_len, \$d, \$source_len, \$len_sans_delim, \$poison)"; is($ret->[0], expected($source, $len_sans_delim, $poison, $source_len), - "delimcpy works when delim is in middle of source; no room for safety NUL"); + "$func works when delim is in middle of source; no room for safety NUL"); is($ret->[1], $len_sans_delim, "Destination length is correct"); is($ret->[2], $len_sans_delim, "Source advance is correct"); - $ret = test_delimcpy($source, $source_len, $d, $source_len, $len_sans_delim - 1, $poison); + $ret = eval "$test_func(\$source, \$source_len, \$d, \$source_len, \$len_sans_delim - 1, \$poison)"; is($ret->[0], expected($source, $len_sans_delim - 1, $poison, $source_len), - "delimcpy works when not enough space for copy"); + "$func works when not enough space for copy"); is($ret->[1], $failure_return, "Destination length is correct"); is($ret->[2], $len_sans_delim, "Source advance is correct"); + } } done_testing(); diff --git a/proto.h b/proto.h index 4abb976..37393e1 100644 --- a/proto.h +++ b/proto.h @@ -840,9 +840,9 @@ PERL_CALLCONV void Perl_delete_eval_scope(pTHX); PERL_CALLCONV char* Perl_delimcpy(char* to, const char* to_end, const char* from, const char* from_end, const int delim, I32* retlen); #define PERL_ARGS_ASSERT_DELIMCPY \ assert(to); assert(to_end); assert(from); assert(from_end); assert(retlen) -PERL_CALLCONV char* Perl_delimcpy_no_escape(char* to, const char* toend, const char* from, const char* fromend, int delim, I32* retlen); +PERL_CALLCONV char* Perl_delimcpy_no_escape(char* to, const char* to_end, const char* from, const char* from_end, const int delim, I32* retlen); #define PERL_ARGS_ASSERT_DELIMCPY_NO_ESCAPE \ - assert(to); assert(toend); assert(from); assert(fromend); assert(retlen) + assert(to); assert(to_end); assert(from); assert(from_end); assert(retlen) PERL_CALLCONV void Perl_despatch_signals(pTHX); #define PERL_ARGS_ASSERT_DESPATCH_SIGNALS PERL_CALLCONV_NO_RET OP* Perl_die(pTHX_ const char* pat, ...) diff --git a/util.c b/util.c index 5d2d1ba..b25a696 100644 --- a/util.c +++ b/util.c @@ -546,51 +546,64 @@ Free_t Perl_mfree (Malloc_t where) =for apidoc delimcpy_no_escape Copy a source buffer to a destination buffer, stopping at (but not including) -the first occurrence of the delimiter byte C, in the source. The source -is the bytes between C and C inclusive. The dest is C -through C. +the first occurrence in the source of the delimiter byte, C. The source +is the bytes between S and C - 1>. Similarly, the dest is +C up to C. -Nothing is copied beyond what fits between C through C. If C -doesn't occur in the source buffer, as much of the source as will fit is copied -to the destination. +The number of bytes copied is written to C<*retlen>. -The actual number of bytes copied is written to C<*retlen>. +Returns the position of C in the C buffer, but if there is no +such occurrence before C, then C is returned, and the entire +buffer S .. C - 1> is copied. If there is room in the destination available after the copy, an extra -terminating safety NUL byte is written (not included in the returned length). +terminating safety C byte is appended (not included in the returned +length). + +The error case is if the destination buffer is not large enough to accommodate +everything that should be copied. In this situation, a value larger than +S - C> is written to C<*retlen>, and as much of the source as +fits will be written to the destination. Not having room for the safety C +is not considered an error. =cut */ char * -Perl_delimcpy_no_escape(char *to, const char *toend, const char *from, - const char *fromend, int delim, I32 *retlen) +Perl_delimcpy_no_escape(char *to, const char *to_end, + const char *from, const char *from_end, + const int delim, I32 *retlen) { const char * delim_pos; - Ptrdiff_t to_len = toend - to; - - /* Only use the minimum of the available source/dest */ - Ptrdiff_t copy_len = MIN(fromend - from, to_len); + Ptrdiff_t from_len = from_end - from; + Ptrdiff_t to_len = to_end - to; + SSize_t copy_len; PERL_ARGS_ASSERT_DELIMCPY_NO_ESCAPE; - assert(copy_len >= 0); + assert(from_len >= 0); + assert(to_len >= 0); - /* Look for the first delimiter in the portion of the source we are allowed - * to look at (determined by the input bounds). */ - delim_pos = (const char *) memchr(from, delim, copy_len); - if (delim_pos) { - copy_len = delim_pos - from; - } /* else didn't find it: copy all of the source permitted */ + /* Look for the first delimiter in the source */ + delim_pos = (const char *) memchr(from, delim, from_len); - Copy(from, to, copy_len, char); + /* Copy up to where the delimiter was found, or the entire buffer if not + * found */ + copy_len = (delim_pos) ? delim_pos - from : from_len; - if (retlen) { - *retlen = copy_len; + /* If not enough room, copy as much as can fit, and set error return */ + if (copy_len > to_len) { + Copy(from, to, to_len, char); + *retlen = DELIMCPY_OUT_OF_BOUNDS_RET; } + else { + Copy(from, to, copy_len, char); - /* If there is extra space available, add a trailing NUL */ - if (copy_len < to_len) { - to[copy_len] = '\0'; + /* If there is extra space available, add a trailing NUL */ + if (copy_len < to_len) { + to[copy_len] = '\0'; + } + + *retlen = copy_len; } return (char *) from + copy_len; -- 1.8.3.1