From 25c7fb78daaad4f8ca1aaa9e19d6db99366388c7 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 12 Jan 2017 14:50:26 -0700 Subject: [PATCH] toke.c: Allow \N{} handling fcn to be used elsewhere in core This function will be used in regcomp.c in a later commit. This commit changes the function so that it is callable outside of toke.c. It adds a parameter and moves some code in new_constant to the wrapper function so that these do not cause problems when called from outside toke. And it adds some assertions --- embed.fnc | 7 ++++--- embed.h | 2 +- proto.h | 10 +++++----- toke.c | 40 +++++++++++++++++++++++++--------------- 4 files changed, 35 insertions(+), 24 deletions(-) diff --git a/embed.fnc b/embed.fnc index a976e8c..517bcb5 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2736,6 +2736,10 @@ s |void |anonymise_cv_maybe |NN GV *gv|NN CV *cv : Used in sv.c and hv.c po |void * |more_bodies |const svtype sv_type|const size_t body_size \ |const size_t arena_size +EXpR |SV* |get_and_check_backslash_N_name|NN const char* s \ + |NN const char* const e \ + |const bool is_utf8 \ + |NN const char** error_msg #if defined(PERL_IN_TOKE_C) s |void |check_uni @@ -2746,9 +2750,6 @@ s |char* |force_word |NN char *start|int token|int check_keyword \ |int allow_pack s |SV* |tokeq |NN SV *sv sR |char* |scan_const |NN char *start -sR |SV* |get_and_check_backslash_N_name|NN const char* s \ - |NN const char* const e \ - |NN const char** error_msg sR |SV* |get_and_check_backslash_N_name_wrapper|NN const char* s \ |NN const char* const e sR |char* |scan_formline |NN char *s diff --git a/embed.h b/embed.h index 827974b..5e87348 100644 --- a/embed.h +++ b/embed.h @@ -1086,6 +1086,7 @@ #define av_reify(a) Perl_av_reify(aTHX_ a) #define current_re_engine() Perl_current_re_engine(aTHX) #define cv_ckproto_len_flags(a,b,c,d,e) Perl_cv_ckproto_len_flags(aTHX_ a,b,c,d,e) +#define get_and_check_backslash_N_name(a,b,c,d) Perl_get_and_check_backslash_N_name(aTHX_ a,b,c,d) #define grok_atoUV Perl_grok_atoUV #define mg_find_mglob(a) Perl_mg_find_mglob(aTHX_ a) #define multiconcat_stringify(a) Perl_multiconcat_stringify(aTHX_ a) @@ -2030,7 +2031,6 @@ #define force_strict_version(a) S_force_strict_version(aTHX_ a) #define force_version(a,b) S_force_version(aTHX_ a,b) #define force_word(a,b,c,d) S_force_word(aTHX_ a,b,c,d) -#define get_and_check_backslash_N_name(a,b,c) S_get_and_check_backslash_N_name(aTHX_ a,b,c) #define get_and_check_backslash_N_name_wrapper(a,b) S_get_and_check_backslash_N_name_wrapper(aTHX_ a,b) #define incline(a,b) S_incline(aTHX_ a,b) #define intuit_method(a,b,c) S_intuit_method(aTHX_ a,b,c) diff --git a/proto.h b/proto.h index ddaea62..936d344 100644 --- a/proto.h +++ b/proto.h @@ -972,6 +972,11 @@ PERL_CALLCONV char* Perl_form(pTHX_ const char* pat, ...) PERL_CALLCONV void Perl_free_tied_hv_pool(pTHX); PERL_CALLCONV void Perl_free_tmps(pTHX); +PERL_CALLCONV SV* Perl_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e, const bool is_utf8, const char** error_msg) + __attribute__warn_unused_result__; +#define PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME \ + assert(s); assert(e); assert(error_msg) + PERL_CALLCONV AV* Perl_get_av(pTHX_ const char *name, I32 flags); #define PERL_ARGS_ASSERT_GET_AV \ assert(name) @@ -6054,11 +6059,6 @@ STATIC char* S_force_version(pTHX_ char *s, int guessing); STATIC char* S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack); #define PERL_ARGS_ASSERT_FORCE_WORD \ assert(start) -STATIC SV* S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e, const char** error_msg) - __attribute__warn_unused_result__; -#define PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME \ - assert(s); assert(e); assert(error_msg) - STATIC SV* S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER \ diff --git a/toke.c b/toke.c index 98ea7ee..af3a5eb 100644 --- a/toke.c +++ b/toke.c @@ -2597,10 +2597,17 @@ S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const * message it returns. */ const char * error_msg = NULL; - SV * result = get_and_check_backslash_N_name(s, e, &error_msg); + SV * result; PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER; + /* charnames doesn't work well if there have been errors found */ + if (PL_error_count > 0) { + return NULL; + } + + result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg); + if (error_msg) { yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0); } @@ -2608,25 +2615,35 @@ S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const return result; } -STATIC SV* -S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e, const char ** error_msg) +SV* +Perl_get_and_check_backslash_N_name(pTHX_ const char* s, + const char* const e, + const bool is_utf8, + const char ** error_msg) { /* points to first character of interior of \N{}, to one beyond the * interior, hence to the "}". Finds what the name resolves to, returning - * an SV* containing it; NULL if no valid one found */ - - dVAR; - SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0); + * an SV* containing it; NULL if no valid one found. + * + * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it + * doesn't have to be. */ + SV* res; HV * table; SV **cvp; SV *cv; SV *rv; HV *stash; const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */ + dVAR; PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; + assert(e >= s); + assert(s > (char *) 3); + + res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0); + if (!SvCUR(res)) { SvREFCNT_dec_NN(res); /* diag_listed_as: Unknown charname '%s' */ @@ -2665,7 +2682,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e, const * characters that begin a character name alias are alphabetic, otherwise * would have to create a isCHARNAME_BEGIN macro */ - if (! UTF) { + if (! is_utf8) { if (! isALPHAU(*s)) { goto bad_charname; } @@ -9205,13 +9222,6 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, if (*key == 'c') { assert (strEQ(key, "charnames")); } assert(type || s); - /* charnames doesn't work well if there have been errors found */ - if (PL_error_count > 0 && *key == 'c') - { - SvREFCNT_dec_NN(sv); - return &PL_sv_undef; - } - sv_2mortal(sv); /* Parent created it permanently */ if (!table || ! (PL_hints & HINT_LOCALIZE_HH) -- 1.8.3.1