This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Save a test by moving a line of code
[perl5.git] / regcomp.c
index 496fb8f..c699996 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -10099,7 +10099,8 @@ tryagain:
     case '[':
     {
        char * const oregcomp_parse = ++RExC_parse;
-        ret = regclass(pRExC_state, flagp,depth+1);
+        ret = regclass(pRExC_state, flagp,depth+1,
+                       FALSE /* means parse the whole char class */ );
        if (*RExC_parse != ']') {
            RExC_parse = oregcomp_parse;
            vFAIL("Unmatched [");
@@ -10287,32 +10288,15 @@ tryagain:
        case 'p':
        case 'P':
            {
-               char* const oldregxend = RExC_end;
 #ifdef DEBUGGING
                char* parse_start = RExC_parse - 2;
 #endif
 
-               if (RExC_parse[1] == '{') {
-                 /* a lovely hack--pretend we saw [\pX] instead */
-                   RExC_end = strchr(RExC_parse, '}');
-                   if (!RExC_end) {
-                       const U8 c = (U8)*RExC_parse;
-                       RExC_parse += 2;
-                       RExC_end = oldregxend;
-                       vFAIL2("Missing right brace on \\%c{}", c);
-                   }
-                   RExC_end++;
-               }
-               else {
-                   RExC_end = RExC_parse + 2;
-                   if (RExC_end > oldregxend)
-                       RExC_end = oldregxend;
-               }
                RExC_parse--;
 
-                ret = regclass(pRExC_state, flagp,depth+1);
+                ret = regclass(pRExC_state, flagp,depth+1,
+                               TRUE /* means just parse this element */ );
 
-               RExC_end = oldregxend;
                RExC_parse--;
 
                Set_Node_Offset(ret, parse_start + 2);
@@ -10628,25 +10612,21 @@ tryagain:
                        break;
                    case 'o':
                        {
-                           STRLEN brace_len = len;
                            UV result;
                            const char* error_msg;
 
-                           bool valid = grok_bslash_o(p,
+                           bool valid = grok_bslash_o(&p,
                                                       &result,
-                                                      &brace_len,
                                                       &error_msg,
-                                                      1);
-                           p += brace_len;
+                                                      TRUE, /* out warnings */
+                                                       FALSE, /* not strict */
+                                                       UTF);
                            if (! valid) {
                                RExC_parse = p; /* going to die anyway; point
                                                   to exact spot of failure */
                                vFAIL(error_msg);
                            }
-                           else
-                           {
-                               ender = result;
-                           }
+                            ender = result;
                            if (PL_encoding && ender < 0x100) {
                                goto recode_encoding;
                            }
@@ -10657,24 +10637,22 @@ tryagain:
                        }
                    case 'x':
                        {
-                           STRLEN brace_len = len;
                            UV result;
                            const char* error_msg;
 
-                           bool valid = grok_bslash_x(p,
+                           bool valid = grok_bslash_x(&p,
                                                       &result,
-                                                      &brace_len,
                                                       &error_msg,
-                                                      1);
-                           p += brace_len;
+                                                      TRUE, /* out warnings */
+                                                       FALSE, /* not strict */
+                                                       UTF);
                            if (! valid) {
                                RExC_parse = p; /* going to die anyway; point
                                                   to exact spot of failure */
                                vFAIL(error_msg);
                            }
-                           else {
-                               ender = result;
-                           }
+                            ender = result;
+
                            if (PL_encoding && ender < 0x100) {
                                goto recode_encoding;
                            }
@@ -11239,7 +11217,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, SV *free_me)
 #define HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION (SvCUR(listsv) != initial_listsv_len)
 
 STATIC regnode *
-S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
+S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1)
 {
     /* parse a bracketed class specification.  Most of these will produce an ANYOF node;
      * but something like [a] will produce an EXACT node; [aA], an EXACTFish
@@ -11283,6 +11261,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     AV * multi_char_matches = NULL; /* Code points that fold to more than one
                                        character; used under /i */
     UV n;
+    char * stop_ptr = RExC_end;    /* where to stop parsing */
 
     /* Unicode properties are stored in a swash; this holds the current one
      * being parsed.  If this swash is the only above-latin1 component of the
@@ -11332,10 +11311,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     /* Assume we are going to generate an ANYOF node. */
     ret = reganode(pRExC_state, ANYOF, 0);
 
-    if (!SIZE_ONLY) {
-       ANYOF_FLAGS(ret) = 0;
-    }
-
     if (UCHARAT(RExC_parse) == '^') {  /* Complement of range. */
        RExC_parse++;
         invert = TRUE;
@@ -11347,6 +11322,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
        listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
     }
     else {
+        ANYOF_FLAGS(ret) = 0;
+
        RExC_emit += ANYOF_SKIP;
        if (LOC) {
            ANYOF_FLAGS(ret) |= ANYOF_LOCALE;
@@ -11375,12 +11352,18 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
        }
     }
 
+    /* If the caller wants us to just parse a single element, accomplish this
+     * by faking the loop ending condition */
+    if (stop_at_1 && RExC_end > RExC_parse) {
+        stop_ptr = RExC_parse + 1;
+    }
+
     /* allow 1st char to be ] (allowing it to be - is dealt with later) */
     if (UCHARAT(RExC_parse) == ']')
        goto charclassloop;
 
 parseit:
-    while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != ']') {
+    while (RExC_parse < stop_ptr && UCHARAT(RExC_parse) != ']') {
 
     charclassloop:
 
@@ -11580,12 +11563,12 @@ parseit:
                RExC_parse--;   /* function expects to be pointed at the 'o' */
                {
                    const char* error_msg;
-                   bool valid = grok_bslash_o(RExC_parse,
+                   bool valid = grok_bslash_o(&RExC_parse,
                                               &value,
-                                              &numlen,
                                               &error_msg,
-                                              SIZE_ONLY);
-                   RExC_parse += numlen;
+                                              SIZE_ONLY,
+                                               FALSE, /* Not strict */
+                                               UTF);
                    if (! valid) {
                        vFAIL(error_msg);
                    }
@@ -11598,12 +11581,12 @@ parseit:
                RExC_parse--;   /* function expects to be pointed at the 'x' */
                {
                    const char* error_msg;
-                   bool valid = grok_bslash_x(RExC_parse,
+                   bool valid = grok_bslash_x(&RExC_parse,
                                               &value,
-                                              &numlen,
                                               &error_msg,
-                                              1);
-                   RExC_parse += numlen;
+                                              TRUE, /* Output warnings */
+                                               FALSE, /* Not strict */
+                                               UTF);
                    if (! valid) {
                        vFAIL(error_msg);
                    }