return TRUE;
}
+const char *
+Perl_form_alien_digit_msg(pTHX_
+ const U8 which, /* 8 or 16 */
+ const STRLEN valids_len, /* length of input before first bad char */
+ const char * const first_bad, /* Ptr to that bad char */
+ const char * const send, /* End of input string */
+ const bool UTF, /* Is it in UTF-8? */
+ const bool braced) /* Is it enclosed in {} */
+{
+ /* Generate a mortal SV containing an appropriate warning message about
+ * alien characters found in an octal or hex constant given by the inputs,
+ * and return a pointer to that SV's string. The message looks like:
+ *
+ * Non-hex character '?' terminates \x early. Resolved as "\x{...}"
+ *
+ */
+
+ /* The usual worst case scenario: 2 chars to display per byte, plus \x{}
+ * (leading zeros could take up more space, and the scalar will
+ * automatically grow if necessary). Space for NUL is added by the newSV()
+ * function */
+ SV * display_char = newSV(2 * UTF8_MAXBYTES + 4);
+ SV * message_sv = sv_newmortal();
+ char symbol;
+
+ PERL_ARGS_ASSERT_FORM_ALIEN_DIGIT_MSG;
+ assert(which == 8 || which == 16);
+
+ /* Calculate the display form of the character */
+ if ( UVCHR_IS_INVARIANT(*first_bad)
+ || (UTF && isUTF8_CHAR((U8 *) first_bad, (U8 *) send)))
+ {
+ pv_uni_display(display_char, (U8 *) first_bad, UTF8SKIP(first_bad),
+ (STRLEN) -1, UNI_DISPLAY_QQ);
+ }
+ else { /* Is not UTF-8, or is illegal UTF-8. Show just the one byte */
+
+ /* It also isn't a UTF-8 invariant character, so no display shortcuts
+ * are available. Use \\x{...} */
+ Perl_sv_setpvf(aTHX_ display_char, "\\x{%02x}", *first_bad);
+ }
+
+ /* Ready to start building the message */
+ sv_setpvs(message_sv, "Non-");
+ if (which == 8) {
+ sv_catpvs(message_sv, "octal");
+ if (braced) {
+ symbol = 'o';
+ }
+ else {
+ symbol = '0'; /* \008, for example */
+ }
+ }
+ else {
+ sv_catpvs(message_sv, "hex");
+ symbol = 'x';
+ }
+ sv_catpvs(message_sv, " character ");
+
+ if (isPRINT(*first_bad)) {
+ sv_catpvs(message_sv, "'");
+ }
+ sv_catsv(message_sv, display_char);
+ if (isPRINT(*first_bad)) {
+ sv_catpvs(message_sv, "'");
+ }
+ Perl_sv_catpvf(aTHX_ message_sv, " terminates \\%c early. Resolved as "
+ "\"\\%c", symbol, symbol);
+ if (braced) {
+ sv_catpvs(message_sv, "{");
+ }
+
+ /* Octal constants have an extra leading 0, but \0 already includes that */
+ if (symbol == 'o' && valids_len < 3) {
+ sv_catpvs(message_sv, "0");
+ }
+ if (valids_len == 0) { /* No legal digits at all */
+ sv_catpvs(message_sv, "00");
+ }
+ else if (valids_len == 1) { /* Just one is legal */
+ sv_catpvs(message_sv, "0");
+ }
+ sv_catpvn(message_sv, first_bad - valids_len, valids_len);
+
+ if (braced) {
+ sv_catpvs(message_sv, "}");
+ }
+ else {
+ sv_catsv(message_sv, display_char);
+ }
+ sv_catpvs(message_sv, "\"");
+
+ SvREFCNT_dec_NN(display_char);
+
+ return SvPVX_const(message_sv);
+}
+
+const char *
+Perl_form_cp_too_large_msg(pTHX_
+ const U8 which, /* 8 or 16 */
+ const char * string, /* NULL, or the text that is supposed to
+ represent a code point */
+ const Size_t len, /* length of 'string' if not NULL; else 0 */
+ const UV cp) /* 0 if 'string' not NULL; else the too-large
+ code point */
+{
+ /* Generate a mortal SV containing an appropriate warning message about
+ * code points that are too large for this system, given by the inputs,
+ * and return a pointer to that SV's string. Either the text of the string
+ * to be converted to a code point is input, or a code point itself. The
+ * former is needed to accurately represent something that overflows.
+ *
+ * The message looks like:
+ *
+ * Use of code point %s is not allowed; the permissible max is %s
+ *
+ */
+
+ SV * message_sv = sv_newmortal();
+ const char * format;
+ const char * prefix;
+
+ PERL_ARGS_ASSERT_FORM_CP_TOO_LARGE_MSG;
+ assert(which == 8 || which == 16);
+
+ /* One but not both must be non-zero */
+ assert((string != NULL) ^ (cp != 0));
+ assert((string == NULL) || len);
+
+ if (which == 8) {
+ format = "%" UVof;
+ prefix = "0";
+ }
+ else {
+ format = "%" UVXf;
+ prefix = "0x";
+ }
+
+ Perl_sv_setpvf(aTHX_ message_sv, "Use of code point %s", prefix);
+ if (string) {
+ Perl_sv_catpvf(aTHX_ message_sv, "%.*s", (int) len, string);
+ }
+ else {
+ Perl_sv_catpvf(aTHX_ message_sv, format, cp);
+ }
+ Perl_sv_catpvf(aTHX_ message_sv, " is not allowed; the permissible max is %s", prefix);
+ Perl_sv_catpvf(aTHX_ message_sv, format, MAX_LEGAL_CP);
+
+ return SvPVX_const(message_sv);
+}
+
bool
Perl_grok_bslash_o(pTHX_ char **s, const char * const send, UV *uv,
const char** message,
- const bool output_warning, const bool strict,
+ U32 * packed_warn,
+ const bool strict,
+ const bool allow_UV_MAX,
const bool UTF)
{
/* Documentation to be supplied when interface nailed down finally
- * This returns FALSE if there is an error which the caller need not recover
- * from; otherwise TRUE. In either case the caller should look at *len [???].
- * It guarantees that the returned codepoint, *uv, when expressed as
- * utf8 bytes, would fit within the skipped "\o{...}" bytes.
- * On input:
+ * This returns FALSE if there is an error the caller should probably die
+ * from; otherwise TRUE.
* s is the address of a pointer to a string. **s is 'o', and the
* previous character was a backslash. At exit, *s will be advanced
* to the byte just after those absorbed by this function. Hence the
- * caller can continue parsing from there. In the case of an error,
- * this routine has generally positioned *s to point just to the right
- * of the first bad spot, so that a message that has a "<--" to mark
- * the spot will be correctly positioned.
+ * caller can continue parsing from there. In the case of an error
+ * when this function returns FALSE, continuing to parse is not an
+ * option, this routine has generally positioned *s to point just to
+ * the right of the first bad spot, so that a message that has a "<--"
+ * to mark the spot will be correctly positioned.
* send - 1 gives a limit in *s that this function is not permitted to
* look beyond. That is, the function may look at bytes only in the
* range *s..send-1
* uv points to a UV that will hold the output value, valid only if the
- * return from the function is TRUE
- * message is a pointer that will be set to an internal buffer giving an
- * error message upon failure (the return is FALSE). Untouched if
- * function succeeds
- * output_warning says whether to output any warning messages, or suppress
- * them
+ * return from the function is TRUE; may be changed from the input
+ * value even when FALSE is returned.
+ * message A pointer to any warning or error message will be stored into
+ * this pointer; NULL if none.
+ * packed_warn if NULL on input asks that this routine display any warning
+ * messages. Otherwise, if the function found a warning, the packed
+ * warning categories will be stored into *packed_warn (and the
+ * corresponding message text into *message); 0 if none.
* strict is true if this should fail instead of warn if there are
* non-octal digits within the braces
+ * allow_UV_MAX is true if this shouldn't fail if the input code point is
+ * UV_MAX, which is normally illegal, reserved for internal use.
* UTF is true iff the string *s is encoded in UTF-8.
*/
char* e;
STRLEN numbers_len;
I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
- | PERL_SCAN_DISALLOW_PREFIX
- | PERL_SCAN_SILENT_NON_PORTABLE
- | PERL_SCAN_SILENT_ILLDIGIT;
+ | PERL_SCAN_DISALLOW_PREFIX
+ | PERL_SCAN_SILENT_NON_PORTABLE
+ | PERL_SCAN_SILENT_ILLDIGIT
+ | PERL_SCAN_SILENT_OVERFLOW;
PERL_ARGS_ASSERT_GROK_BSLASH_O;
assert(*(*s - 1) == '\\');
assert(* *s == 'o');
+
+ *message = NULL;
+ if (packed_warn) *packed_warn = 0;
+
(*s)++;
if (send <= *s || **s != '{') {
sequence if not a digit) */
numbers_len = e - *s;
if (numbers_len == 0) {
- (*s)++; /* Move past the } */
+ (*s)++; /* Move past the '}' */
*message = "Empty \\o{}";
return FALSE;
}
*uv = grok_oct(*s, &numbers_len, &flags, NULL);
+ if (UNLIKELY( (flags & PERL_SCAN_GREATER_THAN_UV_MAX)
+ || (! allow_UV_MAX && *uv == UV_MAX)))
+ {
+ *message = form_cp_too_large_msg(8, *s, numbers_len, 0);
+ *s = e + 1;
+ return FALSE;
+ }
+
/* Note that if has non-octal, will ignore everything starting with that up
* to the '}' */
-
if (numbers_len != (STRLEN) (e - *s)) {
+ *s += numbers_len;
if (strict) {
- *s += numbers_len;
*s += (UTF) ? UTF8_SAFE_SKIP(*s, send) : 1;
*message = "Non-octal character";
return FALSE;
}
- else if (output_warning) {
- Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
- /* diag_listed_as: Non-octal character '%c'. Resolved as "%s" */
- "Non-octal character '%c'. Resolved as \"\\o{%.*s}\"",
- *(*s + numbers_len),
- (int) numbers_len,
- *s);
+
+ if (ckWARN(WARN_DIGIT)) {
+ const char * failure = form_alien_digit_msg(8, numbers_len, *s, send,
+ UTF, TRUE);
+ if (packed_warn) {
+ *message = failure;
+ *packed_warn = packWARN(WARN_DIGIT);
+ }
+ else {
+ Perl_warner(aTHX_ packWARN(WARN_DIGIT), "%s", failure);
+ }
}
}
}
bool
-Perl_grok_bslash_x(pTHX_ char **s, const char * const send, UV *uv,
+Perl_grok_bslash_x(pTHX_ char ** s, const char * const send, UV *uv,
const char** message,
- const bool output_warning, const bool strict,
+ U32 * packed_warn,
+ const bool strict,
+ const bool allow_UV_MAX,
const bool UTF)
{
/* Documentation to be supplied when interface nailed down finally
- * This returns FALSE if there is an error which the caller need not recover
+ * This returns FALSE if there is an error the caller should probably die
* from; otherwise TRUE.
* It guarantees that the returned codepoint, *uv, when expressed as
* utf8 bytes, would fit within the skipped "\x{...}" bytes.
* look beyond. That is, the function may look at bytes only in the
* range *s..send-1
* uv points to a UV that will hold the output value, valid only if the
- * return from the function is TRUE
- * message is a pointer that will be set to an internal buffer giving an
- * error message upon failure (the return is FALSE). Untouched if
- * function succeeds
- * output_warning says whether to output any warning messages, or suppress
- * them
+ * return from the function is TRUE; may be changed from the input
+ * value even when FALSE is returned.
+ * message A pointer to any warning or error message will be stored into
+ * this pointer; NULL if none.
+ * packed_warn if NULL on input asks that this routine display any warning
+ * messages. Otherwise, if the function found a warning, the packed
+ * warning categories will be stored into *packed_warn (and the
+ * corresponding message text into *message); 0 if none.
* strict is true if anything out of the ordinary should cause this to
* fail instead of warn or be silent. For example, it requires
* exactly 2 digits following the \x (when there are no braces).
* 3 digits could be a mistake, so is forbidden in this mode.
+ * allow_UV_MAX is true if this shouldn't fail if the input code point is
+ * UV_MAX, which is normally illegal, reserved for internal use.
* UTF is true iff the string *s is encoded in UTF-8.
*/
char* e;
STRLEN numbers_len;
I32 flags = PERL_SCAN_DISALLOW_PREFIX
- | PERL_SCAN_SILENT_NON_PORTABLE;
-
+ | PERL_SCAN_SILENT_ILLDIGIT
+ | PERL_SCAN_NOTIFY_ILLDIGIT
+ | PERL_SCAN_SILENT_NON_PORTABLE
+ | PERL_SCAN_SILENT_OVERFLOW;
PERL_ARGS_ASSERT_GROK_BSLASH_X;
assert(*(*s - 1) == '\\');
assert(* *s == 'x');
+ *message = NULL;
+ if (packed_warn) *packed_warn = 0;
+
(*s)++;
if (send <= *s) {
return TRUE;
}
- if (strict || ! output_warning) {
- flags |= PERL_SCAN_SILENT_ILLDIGIT;
- }
-
if (**s != '{') {
- STRLEN len = (strict) ? 3 : 2;
-
- *uv = grok_hex(*s, &len, &flags, NULL);
- *s += len;
- if (strict && len != 2) {
- if (len < 2) {
- *s += (UTF) ? UTF8_SAFE_SKIP(*s, send) : 1;
- *message = "Non-hex character";
- }
- else {
+ numbers_len = (strict) ? 3 : 2;
+
+ *uv = grok_hex(*s, &numbers_len, &flags, NULL);
+ *s += numbers_len;
+
+ if (numbers_len != 2 && (strict || (flags & PERL_SCAN_NOTIFY_ILLDIGIT))) {
+ if (numbers_len == 3) { /* numbers_len 3 only happens with strict */
*message = "Use \\x{...} for more than two hex characters";
+ return FALSE;
+ }
+ else if (strict) {
+ *s += (UTF) ? UTF8_SAFE_SKIP(*s, send) : 1;
+ *message = "Non-hex character";
+ return FALSE;
+ }
+ else if (ckWARN(WARN_DIGIT)) {
+ const char * failure = form_alien_digit_msg(16, numbers_len, *s,
+ send, UTF, FALSE);
+
+ if (! packed_warn) {
+ Perl_warner(aTHX_ packWARN(WARN_DIGIT), "%s", failure);
+ }
+ else {
+ *message = failure;
+ *packed_warn = packWARN(WARN_DIGIT);
+ }
}
- return FALSE;
}
return TRUE;
}
e = (char *) memchr(*s, '}', send - *s);
if (!e) {
(*s)++; /* Move past the '{' */
- while (isXDIGIT(**s)) { /* Position beyond the legal digits */
+ while (*s < send && isXDIGIT(**s)) { /* Position beyond legal digits */
(*s)++;
}
/* XXX The corresponding message above for \o is just '\\o{'; other
flags |= PERL_SCAN_ALLOW_UNDERSCORES;
*uv = grok_hex(*s, &numbers_len, &flags, NULL);
- /* Note that if has non-hex, will ignore everything starting with that up
- * to the '}' */
+ if (UNLIKELY( (flags & PERL_SCAN_GREATER_THAN_UV_MAX)
+ || (! allow_UV_MAX && *uv == UV_MAX)))
+ {
+ *message = form_cp_too_large_msg(16, *s, numbers_len, 0);
+ *s = e + 1;
+ return FALSE;
+ }
- if (strict && numbers_len != (STRLEN) (e - *s)) {
+ if (numbers_len != (STRLEN) (e - *s)) {
*s += numbers_len;
- *s += (UTF) ? UTF8_SAFE_SKIP(*s, send) : 1;
- *message = "Non-hex character";
- return FALSE;
+ if (strict) {
+ *s += (UTF) ? UTF8_SAFE_SKIP(*s, send) : 1;
+ *message = "Non-hex character";
+ return FALSE;
+ }
+
+ if (ckWARN(WARN_DIGIT)) {
+ const char * failure = form_alien_digit_msg(16, numbers_len, *s,
+ send, UTF, TRUE);
+ if (! packed_warn) {
+ Perl_warner(aTHX_ packWARN(WARN_DIGIT), "%s", failure);
+ }
+ else {
+ *message = failure;
+ *packed_warn = packWARN(WARN_DIGIT);
+ }
+ }
}
/* Return past the '}' */
p |OP* |localize |NN OP *o|I32 lex
ApdR |I32 |looks_like_number|NN SV *const sv
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_DQUOTE_C)
-EpRX |bool |grok_bslash_x |NN char** s \
- |NN const char* const send \
- |NN UV* uv \
- |NN const char** message \
- |const bool output_warning \
- |const bool strict \
+EpRX |bool |grok_bslash_x |NN char** s \
+ |NN const char* const send \
+ |NN UV* uv \
+ |NN const char** message \
+ |NULLOK U32 * packed_warn \
+ |const bool strict \
+ |const bool allow_UV_MAX \
|const bool utf8
EpRX |bool |grok_bslash_c |const char source \
|NN U8 * result \
|NN const char** message \
|NULLOK U32 * packed_warn
-EpRX |bool |grok_bslash_o |NN char** s \
- |NN const char* const send \
- |NN UV* uv \
- |NN const char** message \
- |const bool output_warning \
- |const bool strict \
+EpRX |bool |grok_bslash_o |NN char** s \
+ |NN const char* const send \
+ |NN UV* uv \
+ |NN const char** message \
+ |NULLOK U32 * packed_warn \
+ |const bool strict \
+ |const bool allow_UV_MAX \
|const bool utf8
+EpRX |const char *|form_alien_digit_msg|const U8 which \
+ |const STRLEN valids_len \
+ |NN const char * const first_bad\
+ |NN const char * const send \
+ |const bool UTF \
+ |const bool braced
EiR |char*|form_short_octal_warning|NN const char * const s \
|const STRLEN len
EiRT |I32 |regcurly |NN const char *s
#endif
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_DQUOTE_C)
+EpRX |const char *|form_cp_too_large_msg|const U8 which \
+ |NULLOK const char * string \
+ |const Size_t len \
+ |const UV cp
+#endif
AMpd |UV |grok_hex |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result
Apd |int |grok_infnan |NN const char** sp|NN const char *send
Apd |int |grok_number |NN const char *pv|STRLEN len|NULLOK UV *valuep
#define invlist_clone(a,b) Perl_invlist_clone(aTHX_ a,b)
# endif
# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_DQUOTE_C)
+#define form_alien_digit_msg(a,b,c,d,e,f) Perl_form_alien_digit_msg(aTHX_ a,b,c,d,e,f)
+#define form_cp_too_large_msg(a,b,c,d) Perl_form_cp_too_large_msg(aTHX_ a,b,c,d)
#define form_short_octal_warning(a,b) S_form_short_octal_warning(aTHX_ a,b)
#define grok_bslash_c(a,b,c,d) Perl_grok_bslash_c(aTHX_ a,b,c,d)
-#define grok_bslash_o(a,b,c,d,e,f,g) Perl_grok_bslash_o(aTHX_ a,b,c,d,e,f,g)
-#define grok_bslash_x(a,b,c,d,e,f,g) Perl_grok_bslash_x(aTHX_ a,b,c,d,e,f,g)
+#define grok_bslash_o(a,b,c,d,e,f,g,h) Perl_grok_bslash_o(aTHX_ a,b,c,d,e,f,g,h)
+#define grok_bslash_x(a,b,c,d,e,f,g,h) Perl_grok_bslash_x(aTHX_ a,b,c,d,e,f,g,h)
#define regcurly S_regcurly
# endif
# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C)
=item *
+L<Non-hex character '%c' terminates \x early. Resolved as "%s"|perldiag/"Non-hex character '%c' terminates \x early. Resolved as "%s"">
+
+This replaces a warning that was much less specific, and which gave
+false information. This new warning parallels the similar
+already-existing one raised for C<\o{}>.
+
+=item *
+
L<message|perldiag/"message">
=back
expression pattern compilation, marking where precisely in the pattern
it occured.
+=item *
+
+L<Non-octal character '%c' terminates \o early. Resolved as "%s"|perldiag/"Non-octal character '%c' terminates \o early. Resolved as "%s"">
+
+now includes the phrase "terminates \o early", and has extra text added
+at the end, when raised during regular expression pattern compilation,
+marking where precisely in the pattern it occured. In some instances
+the text of the resolution has been clarified.
+
=back
=head1 Utility Changes
(?[ [ \xDG ] ])
(?[ [ \x{DEKA} ] ])
+=item Non-hex character '%c' terminates \x early. Resolved as "%s"
+
+(W digit) In parsing a hexadecimal numeric constant, a character was
+unexpectedly encountered that isn't hexadecimal. The resulting value
+is as indicated.
+
+Note that, within braces, every character starting with the first
+non-hexadecimal up to the ending brace is ignored.
+
=item Non-octal character in regex; marked by S<<-- HERE> in m/%s/
(F) In a regular expression, there was a non-octal character where
(?[ [ \o{1278} ] ])
-=item Non-octal character '%c'. Resolved as "%s"
+=item Non-octal character '%c' terminates \o early. Resolved as "%s"
(W digit) In parsing an octal numeric constant, a character was
unexpectedly encountered that isn't octal. The resulting value
is as indicated.
+Note that, within braces, every character starting with the first
+non-octal up to the ending brace is ignored.
+
=item "no" not allowed in expression
(F) The "no" keyword is recognized and executed at compile time, and
assert(invlist)
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_DQUOTE_C)
+PERL_CALLCONV const char * Perl_form_alien_digit_msg(pTHX_ const U8 which, const STRLEN valids_len, const char * const first_bad, const char * const send, const bool UTF, const bool braced)
+ __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_FORM_ALIEN_DIGIT_MSG \
+ assert(first_bad); assert(send)
+
+PERL_CALLCONV const char * Perl_form_cp_too_large_msg(pTHX_ const U8 which, const char * string, const Size_t len, const UV cp)
+ __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_FORM_CP_TOO_LARGE_MSG
+
#ifndef PERL_NO_INLINE_FUNCTIONS
PERL_STATIC_INLINE char* S_form_short_octal_warning(pTHX_ const char * const s, const STRLEN len)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_GROK_BSLASH_C \
assert(result); assert(message)
-PERL_CALLCONV bool Perl_grok_bslash_o(pTHX_ char** s, const char* const send, UV* uv, const char** message, const bool output_warning, const bool strict, const bool utf8)
+PERL_CALLCONV bool Perl_grok_bslash_o(pTHX_ char** s, const char* const send, UV* uv, const char** message, U32 * packed_warn, const bool strict, const bool allow_UV_MAX, const bool utf8)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_GROK_BSLASH_O \
assert(s); assert(send); assert(uv); assert(message)
-PERL_CALLCONV bool Perl_grok_bslash_x(pTHX_ char** s, const char* const send, UV* uv, const char** message, const bool output_warning, const bool strict, const bool utf8)
+PERL_CALLCONV bool Perl_grok_bslash_x(pTHX_ char** s, const char* const send, UV* uv, const char** message, U32 * packed_warn, const bool strict, const bool allow_UV_MAX, const bool utf8)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_GROK_BSLASH_X \
assert(s); assert(send); assert(uv); assert(message)
p++;
break;
case 'o':
- {
- UV result;
- const char* error_msg;
-
- bool valid = grok_bslash_o(&p,
- RExC_end,
- &result,
- &error_msg,
- TO_OUTPUT_WARNINGS(p),
- (bool) RExC_strict,
- UTF);
- if (! valid) {
- RExC_parse = p; /* going to die anyway; point
- to exact spot of failure */
- vFAIL(error_msg);
- }
- UPDATE_WARNINGS_LOC(p - 1);
- ender = result;
- break;
- }
+ if (! grok_bslash_o(&p,
+ RExC_end,
+ &ender,
+ &message,
+ &packed_warn,
+ (bool) RExC_strict,
+ FALSE, /* No illegal cp's */
+ UTF))
+ {
+ RExC_parse = p; /* going to die anyway; point to
+ exact spot of failure */
+ vFAIL(message);
+ }
+
+ if (message && TO_OUTPUT_WARNINGS(p)) {
+ warn_non_literal_string(p, packed_warn, message);
+ }
+ break;
case 'x':
- {
- UV result = UV_MAX; /* initialize to erroneous
- value */
- const char* error_msg;
-
- bool valid = grok_bslash_x(&p,
- RExC_end,
- &result,
- &error_msg,
- TO_OUTPUT_WARNINGS(p),
- (bool) RExC_strict,
- UTF);
- if (! valid) {
- RExC_parse = p; /* going to die anyway; point
- to exact spot of failure */
- vFAIL(error_msg);
- }
- UPDATE_WARNINGS_LOC(p - 1);
- ender = result;
+ if (! grok_bslash_x(&p,
+ RExC_end,
+ &ender,
+ &message,
+ &packed_warn,
+ (bool) RExC_strict,
+ FALSE, /* No illegal cp's */
+ UTF))
+ {
+ RExC_parse = p; /* going to die anyway; point
+ to exact spot of failure */
+ vFAIL(message);
+ }
+
+ if (message && TO_OUTPUT_WARNINGS(p)) {
+ warn_non_literal_string(p, packed_warn, message);
+ }
#ifdef EBCDIC
- if (ender < 0x100) {
- if (RExC_recode_x_to_native) {
- ender = LATIN1_TO_NATIVE(ender);
- }
- }
+ if (ender < 0x100) {
+ if (RExC_recode_x_to_native) {
+ ender = LATIN1_TO_NATIVE(ender);
+ }
+ }
#endif
- break;
- }
+ break;
case 'c':
p++;
if (! grok_bslash_c(*p, &grok_c_char,
PERL_ARGS_ASSERT_OUTPUT_POSIX_WARNINGS;
if (! TO_OUTPUT_WARNINGS(RExC_parse)) {
+ CLEAR_POSIX_WARNINGS();
return;
}
case 'a': value = '\a'; break;
case 'o':
RExC_parse--; /* function expects to be pointed at the 'o' */
- {
- const char* error_msg;
- bool valid = grok_bslash_o(&RExC_parse,
- RExC_end,
- &value,
- &error_msg,
- TO_OUTPUT_WARNINGS(RExC_parse),
- strict,
- UTF);
- if (! valid) {
- vFAIL(error_msg);
- }
- UPDATE_WARNINGS_LOC(RExC_parse - 1);
- }
+ if (! grok_bslash_o(&RExC_parse,
+ RExC_end,
+ &value,
+ &message,
+ &packed_warn,
+ strict,
+ range, /* MAX_UV allowed for range
+ upper limit */
+ UTF))
+ {
+ vFAIL(message);
+ }
+ else if (message && TO_OUTPUT_WARNINGS(RExC_parse)) {
+ warn_non_literal_string(RExC_parse, packed_warn, message);
+ }
+
non_portable_endpoint++;
break;
case 'x':
RExC_parse--; /* function expects to be pointed at the 'x' */
- {
- const char* error_msg;
- bool valid = grok_bslash_x(&RExC_parse,
- RExC_end,
- &value,
- &error_msg,
- TO_OUTPUT_WARNINGS(RExC_parse),
- strict,
- UTF);
- if (! valid) {
- vFAIL(error_msg);
- }
- UPDATE_WARNINGS_LOC(RExC_parse - 1);
- }
+ if (! grok_bslash_x(&RExC_parse,
+ RExC_end,
+ &value,
+ &message,
+ &packed_warn,
+ strict,
+ range, /* MAX_UV allowed for range
+ upper limit */
+ UTF))
+ {
+ vFAIL(message);
+ }
+ else if (message && TO_OUTPUT_WARNINGS(RExC_parse)) {
+ warn_non_literal_string(RExC_parse, packed_warn, message);
+ }
+
non_portable_endpoint++;
break;
case 'c':
/* non-Latin1 code point implies unicode semantics. */
if (value > 255) {
+ if (value > MAX_LEGAL_CP && ( value != UV_MAX
+ || prevvalue > MAX_LEGAL_CP))
+ {
+ vFAIL(form_cp_too_large_msg(16, NULL, 0, value));
+ }
REQUIRE_UNI_RULES(flagp, 0);
if ( ! silence_non_portable
&& UNICODE_IS_PERL_EXTENDED(value)
my $a = qr/\o{1238456}\x{100}/;
my $a = qr/[\o{6548321}]\x{100}/;
EXPECT
-Non-octal character '8'. Resolved as "\o{123}" at - line 3.
-Non-octal character '8'. Resolved as "\o{654}" at - line 4.
+Non-octal character '8' terminates \o early. Resolved as "\o{123}" in regex; marked by <-- HERE in m/\o{1238456} <-- HERE \x{100}/ at - line 3.
+Non-octal character '8' terminates \o early. Resolved as "\o{654}" in regex; marked by <-- HERE in m/[\o{6548321} <-- HERE ]\x{100}/ at - line 4.
########
# regcomp.c
BEGIN {
no warnings 'digit' ;
my $a = "\o{1238456}";
EXPECT
-Non-octal character '8'. Resolved as "\o{123}" at - line 3.
+Non-octal character '8' terminates \o early. Resolved as "\o{123}" at - line 3.
########
# toke.c
use warnings;
print "aq" =~ m[^a\[a-z\]$], "H\n";
print "aq" =~ m(^a\(q\)$), "I\n";
EXPECT
-Illegal hexadecimal digit '\' ignored at - line 5.
-Illegal hexadecimal digit '\' ignored at - line 7.
-Illegal hexadecimal digit '\' ignored at - line 9.
+Non-hex character '\\' terminates \x early. Resolved as "\x00\\" in regex; marked by <-- HERE in m/^a\x <-- HERE \{61\}$/ at - line 5.
+Non-hex character '\\' terminates \x early. Resolved as "\x00\\" in regex; marked by <-- HERE in m/^a\\\x <-- HERE \{6F\}$/ at - line 7.
+Non-hex character '\\' terminates \x early. Resolved as "\x00\\" in regex; marked by <-- HERE in m/^a\\\\\x <-- HERE \{6F\}$/ at - line 9.
A
B
1C
'[\x{101}-{INFTY}]' => 'ANYOFH[0101-INFTY]',
'[\x{101}-{HIGHEST_CP}]' => 'ANYOFH[0101-HIGHEST_CP]',
'[\x{102}\x{104}]' => 'ANYOFHb[0102 0104]',
- '[\x{102}-\x{104}{INFTY}]' => 'ANYOFH[0102-0104 INFTY-INFTY]',
'[\x{102}-\x{104}{HIGHEST_CP}]' => 'ANYOFH[0102-0104 HIGHEST_CP]',
'[\x{102}-\x{104}\x{101}]' => 'ANYOFRb[0101-0104]',
'[\x{102}-\x{104}\x{101}-{INFTY}]' => 'ANYOFH[0101-INFTY]',
'[\x{102}-\x{104}\x{106}]' => 'ANYOFHb[0102-0104 0106]',
'[\x{102}-\x{104}\x{106}-{INFTY}]' => 'ANYOFH[0102-0104 0106-INFTY]',
'[\x{102}-\x{104}\x{106}-{HIGHEST_CP}]' => 'ANYOFH[0102-0104 0106-HIGHEST_CP]',
- '[\x{102}-\x{104}\x{108}-\x{10A}{INFTY}]' => 'ANYOFH[0102-0104 0108-010A INFTY-INFTY]',
'[\x{102}-\x{104}\x{108}-\x{10A}{HIGHEST_CP}]' => 'ANYOFH[0102-0104 0108-010A HIGHEST_CP]',
'[\x{102}-\x{104}\x{108}-\x{10A}\x{101}]' => 'ANYOFHb[0101-0104 0108-010A]',
'[\x{102}-\x{104}\x{108}-\x{10A}\x{101}-{INFTY}]' => 'ANYOFH[0101-INFTY]',
'[\x{106}-{INFTY}\x{107}-{INFTY}]' => 'ANYOFH[0106-INFTY]',
'[\x{106}-{INFTY}\x{107}-{HIGHEST_CP}]' => 'ANYOFH[0106-INFTY]',
'[\x{106}-{INFTY}\x{107}-\x{107}]' => 'ANYOFH[0106-INFTY]',
- '[\x{10C}-{INFTY}{INFTY}]' => 'ANYOFH[010C-INFTY]',
'[\x{10C}-{INFTY}{HIGHEST_CP}]' => 'ANYOFH[010C-INFTY]',
'[\x{10C}-{INFTY}\x{00}-{HIGHEST_CP}]' => 'SANY',
'[\x{10C}-{INFTY}\x{00}-{INFTY}]' => 'SANY',
'[\x{10C}-{INFTY}\x{101}-{INFTY}]' => 'ANYOFH[0101-INFTY]',
'[\x{10C}-{INFTY}\x{101}-{HIGHEST_CP}]' => 'ANYOFH[0101-INFTY]',
'[\x{10C}-{INFTY}\x{102}\x{104}]' => 'ANYOFH[0102 0104 010C-INFTY]',
- '[\x{10C}-{INFTY}\x{102}-\x{104}{INFTY}]' => 'ANYOFH[0102-0104 010C-INFTY]',
'[\x{10C}-{INFTY}\x{102}-\x{104}{HIGHEST_CP}]' => 'ANYOFH[0102-0104 010C-INFTY]',
'[\x{10C}-{INFTY}\x{102}-\x{104}\x{100}]' => 'ANYOFH[0100 0102-0104 010C-INFTY]',
'[\x{10C}-{INFTY}\x{102}-\x{104}\x{101}]' => 'ANYOFH[0101-0104 010C-INFTY]',
'[\x{10C}-{INFTY}\x{102}-\x{104}\x{106}]' => 'ANYOFH[0102-0104 0106 010C-INFTY]',
'[\x{10C}-{INFTY}\x{102}-\x{104}\x{106}-{INFTY}]' => 'ANYOFH[0102-0104 0106-INFTY]',
'[\x{10C}-{INFTY}\x{102}-\x{104}\x{106}-{HIGHEST_CP}]' => 'ANYOFH[0102-0104 0106-INFTY]',
- '[\x{10C}-{INFTY}\x{102}-\x{104}\x{108}-\x{10A}{INFTY}]' => 'ANYOFH[0102-0104 0108-010A 010C-INFTY]',
'[\x{10C}-{INFTY}\x{102}-\x{104}\x{108}-\x{10A}{HIGHEST_CP}]' => 'ANYOFH[0102-0104 0108-010A 010C-INFTY]',
'[\x{10C}-{INFTY}\x{102}-\x{104}\x{108}-\x{10A}\x{101}]' => 'ANYOFH[0101-0104 0108-010A 010C-INFTY]',
'[\x{10C}-{INFTY}\x{102}-\x{104}\x{108}-\x{10A}\x{101}-{INFTY}]' => 'ANYOFH[0101-INFTY]',
'm/[\xABC]/' => "",
=> 'Use \x{...} for more than two hex characters {#} m/[\xABC{#}]/',
- # XXX This is a confusing error message. The G isn't ignored; it just
- # terminates the \x. Also some messages below are missing the <-- HERE,
- # aren't all category 'regexp'. (Hence we have to turn off 'digit'
- # messages as well below)
- 'm/\xAG/' => 'Illegal hexadecimal digit \'G\' ignored',
+ # some messages below aren't all category 'regexp'. (Hence we have to
+ # turn off 'digit' messages as well below)
+ 'm/\xAG/' => 'Non-hex character \'G\' terminates \x early. Resolved as "\x0AG" {#} m/\xA{#}G/',
=> 'Non-hex character {#} m/\xAG{#}/',
- 'm/[\xAG]/' => 'Illegal hexadecimal digit \'G\' ignored',
+ 'm/[\xAG]/' => 'Non-hex character \'G\' terminates \x early. Resolved as "\x0AG" {#} m/[\xA{#}G]/',
=> 'Non-hex character {#} m/[\xAG{#}]/',
- 'm/\o{789}/' => 'Non-octal character \'8\'. Resolved as "\o{7}"',
+ 'm/\o{789}/' => 'Non-octal character \'8\' terminates \o early. Resolved as "\o{007}" {#} m/\o{789}{#}/',
=> 'Non-octal character {#} m/\o{78{#}9}/',
- 'm/[\o{789}]/' => 'Non-octal character \'8\'. Resolved as "\o{7}"',
+ 'm/[\o{789}]/' => 'Non-octal character \'8\' terminates \o early. Resolved as "\o{007}" {#} m/[\o{789}{#}]/',
=> 'Non-octal character {#} m/[\o{78{#}9}]/',
'm/\x{}/' => "",
=> 'Empty \x{} {#} m/\x{}{#}/',
'm/[\x{}]/' => "",
=> 'Empty \x{} {#} m/[\x{}{#}]/',
- 'm/\x{ABCDEFG}/' => 'Illegal hexadecimal digit \'G\' ignored',
+ 'm/\x{ABCDEFG}/' => 'Non-hex character \'G\' terminates \x early. Resolved as "\x{ABCDEF}" {#} m/\x{ABCDEFG}{#}/',
=> 'Non-hex character {#} m/\x{ABCDEFG{#}}/',
- 'm/[\x{ABCDEFG}]/' => 'Illegal hexadecimal digit \'G\' ignored',
+ 'm/[\x{ABCDEFG}]/' => 'Non-hex character \'G\' terminates \x early. Resolved as "\x{ABCDEF}" {#} m/[\x{ABCDEFG}{#}]/',
=> 'Non-hex character {#} m/[\x{ABCDEFG{#}}]/',
"m'[\\y]\\x{100}'" => 'Unrecognized escape \y in character class passed through {#} m/[\y{#}]\x{100}/',
=> 'Unrecognized escape \y in character class {#} m/[\y{#}]\x{100}/',
{
const char* error;
- bool valid = grok_bslash_o(&s, send,
+ if (! grok_bslash_o(&s, send,
&uv, &error,
- TRUE, /* Output warning */
+ NULL,
FALSE, /* Not strict */
- UTF);
- if (! valid) {
+ FALSE, /* No illegal cp's */
+ UTF))
+ {
yyerror(error);
uv = 0; /* drop through to ensure range ends are set */
}
{
const char* error;
- bool valid = grok_bslash_x(&s, send,
+ if (! grok_bslash_x(&s, send,
&uv, &error,
- TRUE, /* Output warning */
+ NULL,
FALSE, /* Not strict */
- UTF);
- if (! valid) {
+ FALSE, /* No illegal cp's */
+ UTF))
+ {
yyerror(error);
uv = 0; /* drop through to ensure range ends are set */
}