STATIC bool
S_grok_bslash_o(pTHX_ char **s, UV *uv, const char** error_msg,
- const bool output_warning)
+ const bool output_warning, const bool strict,
+ const bool UTF)
{
/* Documentation to be supplied when interface nailed down finally
* function succeeds
* output_warning says whether to output any warning messages, or suppress
* them
+ * strict is true if this should fail instead of warn if there are
+ * non-octal digits within the braces
+ * UTF is true iff the string *s is encoded in UTF-8.
*/
char* e;
STRLEN numbers_len;
/* Note that if has non-octal, will ignore everything starting with that up
* to the '}' */
- if (output_warning && numbers_len != (STRLEN) (e - *s)) {
+ if (numbers_len != (STRLEN) (e - *s)) {
+ if (strict) {
+ *s += numbers_len;
+ *s += (UTF) ? UTF8SKIP(*s) : (STRLEN) 1;
+ *error_msg = "Non-octal character";
+ return FALSE;
+ }
+ else if (output_warning) {
Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
/* diag_listed_as: Non-octal character '%c'. Resolved as "%s" */
"Non-octal character '%c'. Resolved as \"\\o{%.*s}\"",
*(*s + numbers_len),
(int) numbers_len,
*s);
+ }
}
/* Return past the '}' */
PERL_STATIC_INLINE bool
S_grok_bslash_x(pTHX_ char **s, UV *uv, const char** error_msg,
- const bool output_warning)
+ const bool output_warning, const bool strict,
+ const bool UTF)
{
/* Documentation to be supplied when interface nailed down finally
* function succeeds
* output_warning says whether to output any warning messages, or suppress
* them
+ * strict is true if anything out of the ordinary should cause this to
+ * fail instead of warn or be silent. For example, it requires
+ * exactly 2 digits following the \x (when there are no braces).
+ * 3 digits could be a mistake, so is forbidden in this mode.
+ * UTF is true iff the string *s is encoded in UTF-8.
*/
char* e;
STRLEN numbers_len;
- I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
- | PERL_SCAN_DISALLOW_PREFIX;
+ I32 flags = 0;
PERL_ARGS_ASSERT_GROK_BSLASH_X;
assert(**s == 'x');
(*s)++;
+ if (strict) {
+ flags |= PERL_SCAN_SILENT_ILLDIGIT;
+ }
+
if (**s != '{') {
- I32 flags = PERL_SCAN_DISALLOW_PREFIX;
- STRLEN len = 2;
+ STRLEN len = (strict) ? 3 : 2;
+
+ flags |= PERL_SCAN_DISALLOW_PREFIX;
*uv = grok_hex(*s, &len, &flags, NULL);
*s += len;
+ if (strict && len != 2) {
+ if (len < 2) {
+ *s += (UTF) ? UTF8SKIP(*s) : 1;
+ *error_msg = "Non-hex character";
+ }
+ else {
+ *error_msg = "Use \\x{...} for more than two hex characters";
+ }
+ return FALSE;
+ }
return TRUE;
}
(*s)++; /* Point to expected first digit (could be first byte of utf8
sequence if not a digit) */
numbers_len = e - *s;
+ if (numbers_len == 0) {
+ if (strict) {
+ (*s)++; /* Move past the } */
+ *error_msg = "Number with no digits";
+ return FALSE;
+ }
+ return TRUE;
+ }
+
+ flags |= PERL_SCAN_ALLOW_UNDERSCORES|PERL_SCAN_DISALLOW_PREFIX;
+
*uv = grok_hex(*s, &numbers_len, &flags, NULL);
/* Note that if has non-hex, will ignore everything starting with that up
* to the '}' */
+ if (strict && numbers_len != (STRLEN) (e - *s)) {
+ *s += numbers_len;
+ *s += (UTF) ? UTF8SKIP(*s) : 1;
+ *error_msg = "Non-hex character";
+ return FALSE;
+ }
+
/* Return past the '}' */
*s = e + 1;
EMsR |char |grok_bslash_c |const char source|const bool utf8|const bool output_warning
EMsR |bool |grok_bslash_o |NN char** s|NN UV* uv \
|NN const char** error_msg \
- |const bool output_warning
+ |const bool output_warning \
+ |const bool strict|const bool utf8
EMiR |bool |grok_bslash_x |NN char** s|NN UV* uv \
|NN const char** error_msg \
- |const bool output_warning
+ |const bool output_warning \
+ |const bool strict|const bool utf8
#endif
Apd |UV |grok_hex |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result
Apd |int |grok_number |NN const char *pv|STRLEN len|NULLOK UV *valuep
# endif
# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C)
#define grok_bslash_c(a,b,c) S_grok_bslash_c(aTHX_ a,b,c)
-#define grok_bslash_o(a,b,c,d) S_grok_bslash_o(aTHX_ a,b,c,d)
-#define grok_bslash_x(a,b,c,d) S_grok_bslash_x(aTHX_ a,b,c,d)
+#define grok_bslash_o(a,b,c,d,e,f) S_grok_bslash_o(aTHX_ a,b,c,d,e,f)
+#define grok_bslash_x(a,b,c,d,e,f) S_grok_bslash_x(aTHX_ a,b,c,d,e,f)
#define regcurly(a) S_regcurly(aTHX_ a)
# endif
# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
STATIC char S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning)
__attribute__warn_unused_result__;
-STATIC bool S_grok_bslash_o(pTHX_ char** s, UV* uv, const char** error_msg, const bool output_warning)
+STATIC bool S_grok_bslash_o(pTHX_ char** s, UV* uv, const char** error_msg, const bool output_warning, const bool strict, const bool utf8)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
#define PERL_ARGS_ASSERT_GROK_BSLASH_O \
assert(s); assert(uv); assert(error_msg)
-PERL_STATIC_INLINE bool S_grok_bslash_x(pTHX_ char** s, UV* uv, const char** error_msg, const bool output_warning)
+PERL_STATIC_INLINE bool S_grok_bslash_x(pTHX_ char** s, UV* uv, const char** error_msg, const bool output_warning, const bool strict, const bool utf8)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
bool valid = grok_bslash_o(&p,
&result,
&error_msg,
- TRUE); /* out warnings */
+ TRUE, /* out warnings */
+ FALSE, /* not strict */
+ UTF);
if (! valid) {
RExC_parse = p; /* going to die anyway; point
to exact spot of failure */
bool valid = grok_bslash_x(&p,
&result,
&error_msg,
- TRUE); /* out warnings */
+ TRUE, /* out warnings */
+ FALSE, /* not strict */
+ UTF);
if (! valid) {
RExC_parse = p; /* going to die anyway; point
to exact spot of failure */
bool valid = grok_bslash_o(&RExC_parse,
&value,
&error_msg,
- SIZE_ONLY);
+ SIZE_ONLY,
+ FALSE, /* Not strict */
+ UTF);
if (! valid) {
vFAIL(error_msg);
}
bool valid = grok_bslash_x(&RExC_parse,
&value,
&error_msg,
- 1);
+ TRUE, /* Output warnings */
+ FALSE, /* Not strict */
+ UTF);
if (! valid) {
vFAIL(error_msg);
}
Operation "%s" returns its argument for UTF-16 surrogate U+%X
Unicode surrogate U+%X is illegal in UTF-8
UTF-16 surrogate U+%X
+Non-octal character in regex; marked by <-- HERE in m/%s/
+Non-hex character in regex; marked by <-- HERE in m/%s/
+Use \\x{...} for more than two hex characters in regex; marked by <-- HERE in m/%s/
const char* error;
bool valid = grok_bslash_o(&s, &uv, &error,
- TRUE); /* Output warning */
+ TRUE, /* Output warning */
+ FALSE, /* Not strict */
+ UTF);
if (! valid) {
yyerror(error);
continue;
const char* error;
bool valid = grok_bslash_x(&s, &uv, &error,
- TRUE); /* Output warning */
+ TRUE, /* Output warning */
+ FALSE, /* Not strict */
+ UTF);
if (! valid) {
yyerror(error);
continue;