case '[':
{
char * const oregcomp_parse = ++RExC_parse;
- ret = regclass(pRExC_state, flagp,depth+1);
+ ret = regclass(pRExC_state, flagp,depth+1,
+ FALSE /* means parse the whole char class */ );
if (*RExC_parse != ']') {
RExC_parse = oregcomp_parse;
vFAIL("Unmatched [");
case 'p':
case 'P':
{
- char* const oldregxend = RExC_end;
#ifdef DEBUGGING
char* parse_start = RExC_parse - 2;
#endif
- if (RExC_parse[1] == '{') {
- /* a lovely hack--pretend we saw [\pX] instead */
- RExC_end = strchr(RExC_parse, '}');
- if (!RExC_end) {
- const U8 c = (U8)*RExC_parse;
- RExC_parse += 2;
- RExC_end = oldregxend;
- vFAIL2("Missing right brace on \\%c{}", c);
- }
- RExC_end++;
- }
- else {
- RExC_end = RExC_parse + 2;
- if (RExC_end > oldregxend)
- RExC_end = oldregxend;
- }
RExC_parse--;
- ret = regclass(pRExC_state, flagp,depth+1);
+ ret = regclass(pRExC_state, flagp,depth+1,
+ TRUE /* means just parse this element */ );
- RExC_end = oldregxend;
RExC_parse--;
Set_Node_Offset(ret, parse_start + 2);
break;
case 'o':
{
- STRLEN brace_len = len;
UV result;
const char* error_msg;
- bool valid = grok_bslash_o(p,
+ bool valid = grok_bslash_o(&p,
&result,
- &brace_len,
&error_msg,
- 1);
- p += brace_len;
+ TRUE, /* out warnings */
+ FALSE, /* not strict */
+ UTF);
if (! valid) {
RExC_parse = p; /* going to die anyway; point
to exact spot of failure */
vFAIL(error_msg);
}
- else
- {
- ender = result;
- }
+ ender = result;
if (PL_encoding && ender < 0x100) {
goto recode_encoding;
}
}
case 'x':
{
- STRLEN brace_len = len;
UV result;
const char* error_msg;
- bool valid = grok_bslash_x(p,
+ bool valid = grok_bslash_x(&p,
&result,
- &brace_len,
&error_msg,
- 1);
- p += brace_len;
+ TRUE, /* out warnings */
+ FALSE, /* not strict */
+ UTF);
if (! valid) {
RExC_parse = p; /* going to die anyway; point
to exact spot of failure */
vFAIL(error_msg);
}
- else {
- ender = result;
- }
+ ender = result;
+
if (PL_encoding && ender < 0x100) {
goto recode_encoding;
}
#define HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION (SvCUR(listsv) != initial_listsv_len)
STATIC regnode *
-S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
+S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1)
{
/* parse a bracketed class specification. Most of these will produce an ANYOF node;
* but something like [a] will produce an EXACT node; [aA], an EXACTFish
AV * multi_char_matches = NULL; /* Code points that fold to more than one
character; used under /i */
UV n;
+ char * stop_ptr = RExC_end; /* where to stop parsing */
/* Unicode properties are stored in a swash; this holds the current one
* being parsed. If this swash is the only above-latin1 component of the
/* Assume we are going to generate an ANYOF node. */
ret = reganode(pRExC_state, ANYOF, 0);
- if (!SIZE_ONLY) {
- ANYOF_FLAGS(ret) = 0;
- }
-
if (UCHARAT(RExC_parse) == '^') { /* Complement of range. */
RExC_parse++;
invert = TRUE;
listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
}
else {
+ ANYOF_FLAGS(ret) = 0;
+
RExC_emit += ANYOF_SKIP;
if (LOC) {
ANYOF_FLAGS(ret) |= ANYOF_LOCALE;
}
}
+ /* If the caller wants us to just parse a single element, accomplish this
+ * by faking the loop ending condition */
+ if (stop_at_1 && RExC_end > RExC_parse) {
+ stop_ptr = RExC_parse + 1;
+ }
+
/* allow 1st char to be ] (allowing it to be - is dealt with later) */
if (UCHARAT(RExC_parse) == ']')
goto charclassloop;
parseit:
- while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != ']') {
+ while (RExC_parse < stop_ptr && UCHARAT(RExC_parse) != ']') {
charclassloop:
RExC_parse--; /* function expects to be pointed at the 'o' */
{
const char* error_msg;
- bool valid = grok_bslash_o(RExC_parse,
+ bool valid = grok_bslash_o(&RExC_parse,
&value,
- &numlen,
&error_msg,
- SIZE_ONLY);
- RExC_parse += numlen;
+ SIZE_ONLY,
+ FALSE, /* Not strict */
+ UTF);
if (! valid) {
vFAIL(error_msg);
}
RExC_parse--; /* function expects to be pointed at the 'x' */
{
const char* error_msg;
- bool valid = grok_bslash_x(RExC_parse,
+ bool valid = grok_bslash_x(&RExC_parse,
&value,
- &numlen,
&error_msg,
- 1);
- RExC_parse += numlen;
+ TRUE, /* Output warnings */
+ FALSE, /* Not strict */
+ UTF);
if (! valid) {
vFAIL(error_msg);
}