From 3955e1a9ae24737181ef9e4daba13179b936e4c9 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 1 Mar 2011 10:03:25 -0700 Subject: [PATCH] toke.c: Raise error for multiple regexp mods When the new regular expression modifiers being allowed in suffix-form were added on a very tight schedule, it was with the understanding that the error checking that only one can occur per regular experssion would be added later. This accomplishes that. --- pod/perldiag.pod | 12 ++++++++++++ t/lib/warnings/toke | 13 +++++++++++++ toke.c | 44 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 65 insertions(+), 4 deletions(-) diff --git a/pod/perldiag.pod b/pod/perldiag.pod index f5e115c..5efd095 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -4015,6 +4015,18 @@ discovered. (P) The regular expression engine got confused by what the regular expression compiler gave it. +=item Regexp modifier "/%c" may not appear twice + +(F syntax) The regular expression pattern had one of the +mutually exclusive modifiers repeated. Remove all but one of the +occurrences. + +=item Regexp modifiers "/%c" and "/%c" are mutually exclusive + +(F syntax) The regular expression pattern had more than one of the +mutually exclusive modifiers. Retain only the modifier that is +supposed to be there. + =item Regexp out of space (P) A "can't happen" error, because safemalloc() should have caught it diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke index 425b613..59dc752 100644 --- a/t/lib/warnings/toke +++ b/t/lib/warnings/toke @@ -956,3 +956,16 @@ EXPECT "\c," is more clearly written simply as "l" at - line 4. "\c`" is more clearly written simply as "\ " at - line 5. "\c{" is deprecated and is more clearly written as ";" at - line 7. +######## +# toke.c +use warnings 'syntax' ; +my $a = qr/foo/du; +$a = qr/foo/laai; +$a = qr/foo/lil; +no warnings 'syntax' ; +my $a = qr/foo/du; +EXPECT +Regexp modifiers "/d" and "/u" are mutually exclusive at - line 3, near "= " +Regexp modifiers "/l" and "/a" are mutually exclusive at - line 4, near "= " +Regexp modifier "/l" may not appear twice at - line 5, near "= " +BEGIN not safe after errors--compilation aborted at - line 6. diff --git a/toke.c b/toke.c index e55b4b3..2dbe7f7 100644 --- a/toke.c +++ b/toke.c @@ -8774,13 +8774,17 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL } static bool -S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) { +S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) { /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in * the parse starting at 's', based on the subset that are valid in this * context input to this routine in 'valid_flags'. Advances s. Returns * TRUE if the input was a valid flag, so the next char may be as well; - * otherwise FALSE */ + * otherwise FALSE. 'charset' should point to a NUL upon first call on the + * current regex. This routine will set it to any charset modifier found. + * The caller shouldn't change it. This way, another charset modifier + * encountered in the parse can be detected as an error, as we have decided + * allow only one */ const char c = **s; @@ -8828,7 +8832,11 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) { "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way"); return FALSE; } + if (*charset) { + goto multiple_charsets; + } set_regex_charset(pmfl, REGEX_LOCALE_CHARSET); + *charset = c; break; case UNICODE_PAT_MOD: /* In 5.14, qr//unless and qr//until are legal but deprecated; the @@ -8836,7 +8844,11 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) { if (*((*s) + 1) == 'n') { goto deprecate; } + if (*charset) { + goto multiple_charsets; + } set_regex_charset(pmfl, REGEX_UNICODE_CHARSET); + *charset = c; break; case ASCII_RESTRICT_PAT_MOD: /* In 5.14, qr//and is legal but deprecated; the 'n' means they @@ -8852,9 +8864,18 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) { else { set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET); } + if (*charset) { /* Do this after the increment of *s in /aa, so + the return advances the ptr correctly */ + goto multiple_charsets; + } + *charset = c; break; case DEPENDS_PAT_MOD: + if (*charset) { + goto multiple_charsets; + } set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET); + *charset = c; break; } @@ -8865,6 +8886,18 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) { Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "Having no space between pattern and following word is deprecated"); return FALSE; + + multiple_charsets: + if (*charset != c) { + yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c)); + } + else { + yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c)); + } + + /* Pretend that it worked, so will continue processing before dieing */ + (*s)++; + return TRUE; } STATIC char * @@ -8875,6 +8908,7 @@ S_scan_pat(pTHX_ char *start, I32 type) char *s = scan_str(start,!!PL_madskills,FALSE); const char * const valid_flags = (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); + char charset = '\0'; /* character set modifier */ #ifdef PERL_MAD char *modstart; #endif @@ -8916,7 +8950,7 @@ S_scan_pat(pTHX_ char *start, I32 type) #ifdef PERL_MAD modstart = s; #endif - while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s)) {}; + while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {}; #ifdef PERL_MAD if (PL_madskills && modstart != s) { SV* tmptoken = newSVpvn(modstart, s - modstart); @@ -8943,6 +8977,7 @@ S_scan_subst(pTHX_ char *start) register PMOP *pm; I32 first_start; I32 es = 0; + char charset = '\0'; /* character set modifier */ #ifdef PERL_MAD char *modstart; #endif @@ -8995,7 +9030,8 @@ S_scan_subst(pTHX_ char *start) s++; es++; } - else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s)) { + else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset)) + { break; } } -- 1.8.3.1