#define get_invlist_iter_addr S_get_invlist_iter_addr
#define grok_bslash_N(a,b,c,d,e,f,g) S_grok_bslash_N(aTHX_ a,b,c,d,e,f,g)
#define handle_named_backref(a,b,c,d) S_handle_named_backref(aTHX_ a,b,c,d)
-#define handle_possible_posix(a,b,c,d) S_handle_possible_posix(aTHX_ a,b,c,d)
+#define handle_possible_posix(a,b,c,d,e) S_handle_possible_posix(aTHX_ a,b,c,d,e)
#define handle_regex_sets(a,b,c,d,e) S_handle_regex_sets(aTHX_ a,b,c,d,e)
#define invlist_clear(a) S_invlist_clear(aTHX_ a)
#define invlist_clone(a) S_invlist_clone(aTHX_ a)
#define join_exact(a,b,c,d,e,f,g) S_join_exact(aTHX_ a,b,c,d,e,f,g)
#define make_trie(a,b,c,d,e,f,g,h) S_make_trie(aTHX_ a,b,c,d,e,f,g,h)
#define nextchar(a) S_nextchar(aTHX_ a)
+#define output_or_return_posix_warnings(a,b,c) S_output_or_return_posix_warnings(aTHX_ a,b,c)
#define parse_lparen_question_flags(a) S_parse_lparen_question_flags(aTHX_ a)
#define populate_ANYOF_from_invlist(a,b) S_populate_ANYOF_from_invlist(aTHX_ a,b)
#define reg(a,b,c,d) S_reg(aTHX_ a,b,c,d)
/* 'posix_warnings' and 'warn_text' are names of variables in the following
* routine. q.v. */
#define ADD_POSIX_WARNING(p, text) STMT_START { \
- if (posix_warnings && ( posix_warnings != (AV **) -1 \
- || (PASS2 && ckWARN(WARN_REGEXP)))) \
- { \
+ if (posix_warnings) { \
if (! warn_text) warn_text = newAV(); \
av_push(warn_text, Perl_newSVpvf(aTHX_ \
WARNING_PREFIX \
besides RExC_parse. */
char ** updated_parse_ptr, /* Where to set the updated parse pointer, or
NULL */
- AV ** posix_warnings /* Where to place any generated warnings, or -1
- if to output them, or NULL */
+ AV ** posix_warnings, /* Where to place any generated warnings, or
+ NULL */
+ const bool check_only /* Don't die if error */
)
{
/* This parses what the caller thinks may be one of the three POSIX
* 'updated_parse_ptr' is not changed. No warnings nor errors are
* raised.
*
- * In b) there may be warnings and even errors generated. What to do about
- * these is determined by the 'posix_warnings' parameter. If it is NULL,
- * this call is treated as a check-only, scouting-out-the-territory call,
- * and no warnings nor errors are generated at all. Otherwise, any errors
- * are raised if found. If 'posix_warnings' is -1 (appropriately cast),
- * warnings are generated and displayed (in pass 2), just as they would be
- * for any other message of the same type from this file. If it isn't NULL
- * and not -1, warnings aren't displayed, but instead an AV is generated
- * with all the warning messages (that aren't to be ignored) stored into
- * it, so that the caller can output them if it wants. This is done in all
+ * In b) there may be errors or warnings generated. If 'check_only' is
+ * TRUE, then any errors are discarded. Warnings are returned to the
+ * caller via an AV* created into '*posix_warnings' if it is not NULL. If
+ * instead it is NULL, warnings are suppressed. This is done in all
* passes. The reason for this is that the rest of the parsing is heavily
* dependent on whether this routine found a valid posix class or not. If
* it did, the closing ']' is absorbed as part of the class. If no class,
temp_ptr++;
if (*temp_ptr == ']') {
temp_ptr++;
- if (! found_problem && posix_warnings) {
+ if (! found_problem && ! check_only) {
RExC_parse = (char *) temp_ptr;
vFAIL3("POSIX syntax [%c %c] is reserved for future "
"extensions", open_char, open_char);
}
if (warn_text) {
- if (posix_warnings != (AV **) -1) {
- *posix_warnings = warn_text;
+ if (posix_warnings) {
+ /* mortalize to avoid a leak with FATAL warnings */
+ *posix_warnings = (AV *) sv_2mortal((SV *) warn_text);
}
else {
- SV * msg;
- while ((msg = av_shift(warn_text)) != &PL_sv_undef) {
- Perl_warner(aTHX_ packWARN(WARN_REGEXP),
- "%s", SvPVX(msg));
- SvREFCNT_dec_NN(msg);
- }
SvREFCNT_dec_NN(warn_text);
}
}
* one */
return class_number + complement;
}
- else if (posix_warnings) {
+ else if (! check_only) {
/* Here, it is an unrecognized class. This is an error (unless the
* call is to check only, which we've already handled above) */
< handle_possible_posix(pRExC_state,
RExC_parse + 1,
NULL,
- NULL));
+ NULL,
+ TRUE /* checking only */));
/* If it is a posix class, leave the parse pointer at the
* '[' to fool regclass() into thinking it is part of a
* '[[:posix:]]'. */
no_close:
/* We output the messages even if warnings are off, because we'll fail
* the very next thing, and these give a likely diagnosis for that */
- if (posix_warnings) {
- SV * msg;
- while ((msg = av_shift(posix_warnings)) != &PL_sv_undef) {
- Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s", SvPVX(msg));
- SvREFCNT_dec_NN(msg);
- }
- SvREFCNT_dec_NN(posix_warnings);
+ if (posix_warnings && av_tindex(posix_warnings) >= 0) {
+ output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
}
FAIL("Syntax error in (?[...])");
< handle_possible_posix(pRExC_state,
RExC_parse + 1,
NULL,
- NULL));
+ NULL,
+ TRUE /* checking only */));
/* If it is a posix class, leave the parse pointer at the '['
* to fool regclass() into thinking it is part of a
* '[[:posix:]]'. */
}
}
+STATIC void
+S_output_or_return_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings, AV** return_posix_warnings)
+{
+ /* If the final parameter is NULL, output the elements of the array given
+ * by '*posix_warnings' as REGEXP warnings. Otherwise, the elements are
+ * pushed onto it, (creating if necessary) */
+
+ SV * msg;
+ const bool first_is_fatal = ! return_posix_warnings
+ && ckDEAD(packWARN(WARN_REGEXP));
+
+ PERL_ARGS_ASSERT_OUTPUT_OR_RETURN_POSIX_WARNINGS;
+
+ while ((msg = av_shift(posix_warnings)) != &PL_sv_undef) {
+ if (return_posix_warnings) {
+ if (! *return_posix_warnings) { /* mortalize to not leak if
+ warnings are fatal */
+ *return_posix_warnings = (AV *) sv_2mortal((SV *) newAV());
+ }
+ av_push(*return_posix_warnings, msg);
+ }
+ else {
+ if (first_is_fatal) { /* Avoid leaking this */
+ av_undef(posix_warnings); /* This isn't necessary if the
+ array is mortal, but is a
+ fail-safe */
+ (void) sv_2mortal(msg);
+ if (PASS2) {
+ SAVEFREESV(RExC_rx_sv);
+ }
+ }
+ Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s", SvPVX(msg));
+ SvREFCNT_dec_NN(msg);
+ }
+ }
+}
+
STATIC AV *
S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, const STRLEN cp_count)
{
* input. Only after this position is reached do we check again */
char *not_posix_region_end = RExC_parse - 1;
+ AV* posix_warnings = NULL;
+ const bool do_posix_warnings = return_posix_warnings
+ || (PASS2 && ckWARN(WARN_REGEXP));
+
GET_RE_DEBUG_FLAGS_DECL;
PERL_ARGS_ASSERT_REGCLASS;
allow_multi_folds = FALSE;
#endif
- if (return_posix_warnings == NULL) {
- return_posix_warnings = (AV **) -1;
- }
-
/* Assume we are going to generate an ANYOF node. */
ret = reganode(pRExC_state,
(LOC)
/* Check that they didn't say [:posix:] instead of [[:posix:]] */
if (! ret_invlist && MAYBE_POSIXCC(UCHARAT(RExC_parse))) {
- char *class_end;
int maybe_class = handle_possible_posix(pRExC_state,
RExC_parse,
- &class_end,
- NULL);
- if (maybe_class >= OOB_NAMEDCLASS) {
- not_posix_region_end = class_end;
- if (PASS2 && return_posix_warnings == (AV **) -1) {
- SAVEFREESV(RExC_rx_sv);
- ckWARN4reg(class_end,
- "POSIX syntax [%c %c] belongs inside character classes%s",
- *RExC_parse, *RExC_parse,
- (maybe_class == OOB_NAMEDCLASS)
- ? ((POSIXCC_NOTYET(*RExC_parse))
- ? " (but this one isn't implemented)"
- : " (but this one isn't fully valid)")
- : ""
- );
- (void)ReREFCNT_inc(RExC_rx_sv);
- }
- }
+ ¬_posix_region_end,
+ NULL,
+ TRUE /* checking only */);
+ if (PASS2 && maybe_class >= OOB_NAMEDCLASS && do_posix_warnings) {
+ SAVEFREESV(RExC_rx_sv);
+ ckWARN4reg(not_posix_region_end,
+ "POSIX syntax [%c %c] belongs inside character classes%s",
+ *RExC_parse, *RExC_parse,
+ (maybe_class == OOB_NAMEDCLASS)
+ ? ((POSIXCC_NOTYET(*RExC_parse))
+ ? " (but this one isn't implemented)"
+ : " (but this one isn't fully valid)")
+ : ""
+ );
+ (void)ReREFCNT_inc(RExC_rx_sv);
+ }
}
/* If the caller wants us to just parse a single element, accomplish this
goto charclassloop;
while (1) {
+
+ if ( posix_warnings
+ && av_tindex(posix_warnings) >= 0
+ && RExC_parse > not_posix_region_end)
+ {
+ /* Warnings about posix class issues are considered tentative until
+ * we are far enough along in the parse that we can no longer
+ * change our mind, at which point we either output them or add
+ * them, if it has so specified, to what gets returned to the
+ * caller. This is done each time through the loop so that a later
+ * class won't zap them before they have been dealt with. */
+ output_or_return_posix_warnings(pRExC_state, posix_warnings,
+ return_posix_warnings);
+ }
+
if (RExC_parse >= stop_ptr) {
break;
}
value = UCHARAT(RExC_parse++);
if (value == '[') {
+ char * posix_class_end;
namedclass = handle_possible_posix(pRExC_state,
RExC_parse,
- ¬_posix_region_end,
- return_posix_warnings);
+ &posix_class_end,
+ do_posix_warnings ? &posix_warnings : NULL,
+ FALSE /* die if error */);
if (namedclass > OOB_NAMEDCLASS) {
- RExC_parse = not_posix_region_end;
+
+ /* If there was an earlier attempt to parse this particular
+ * posix class, and it failed, it was a false alarm, as this
+ * successful one proves */
+ if ( posix_warnings
+ && av_tindex(posix_warnings) >= 0
+ && not_posix_region_end >= RExC_parse
+ && not_posix_region_end <= posix_class_end)
+ {
+ av_undef(posix_warnings);
+ }
+
+ RExC_parse = posix_class_end;
+ }
+ else if (namedclass == OOB_NAMEDCLASS) {
+ not_posix_region_end = posix_class_end;
}
else {
namedclass = OOB_NAMEDCLASS;
RExC_parse - 1, /* -1 because parse has already been
advanced */
¬_posix_region_end,
- return_posix_warnings);
+ do_posix_warnings ? &posix_warnings : NULL,
+ TRUE /* checking only */);
}
else if (value == '\\') {
/* Is a backslash; get the code point of the char after it */
range = 0; /* this range (if it was one) is done now */
} /* End of loop through all the text within the brackets */
+
+ if ( posix_warnings && av_tindex(posix_warnings) >= 0) {
+ output_or_return_posix_warnings(pRExC_state, posix_warnings,
+ return_posix_warnings);
+ }
+
/* If anything in the class expands to more than one character, we have to
* deal with them by building up a substitute parse string, and recursively
* calling reg() on it, instead of proceeding */