This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: regex start class for sharp s
authorKarl Williamson <public@khwilliamson.com>
Fri, 23 Dec 2011 19:24:09 +0000 (12:24 -0700)
committerKarl Williamson <public@khwilliamson.com>
Thu, 19 Jan 2012 18:58:18 +0000 (11:58 -0700)
Under most folding types, the optimizer start class should include all
of s, S, and the sharp s (\xdf) if it includes any of them.  The code
was neglecting the latter.  This is currently not relevant, as there is
special handling of the sharp s elsewhere in regcomp.c.  But this is a
step to changing that special handling to fix some bugs.

regcomp.c

index dfdbf7b..f7bb108 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -3318,6 +3318,19 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         * the full latin1 fold.  (Can't do this for locale,
                         * because not known until runtime */
                        ANYOF_BITMAP_SET(data->start_class, PL_fold_latin1[uc]);
+
+                       /* All folds except under /iaa that include s, S, and
+                        * sharp_s also may include the others */
+                       if (OP(scan) != EXACTFA) {
+                           if (uc == 's' || uc == 'S') {
+                               ANYOF_BITMAP_SET(data->start_class,
+                                                LATIN_SMALL_LETTER_SHARP_S);
+                           }
+                           else if (uc == LATIN_SMALL_LETTER_SHARP_S) {
+                               ANYOF_BITMAP_SET(data->start_class, 's');
+                               ANYOF_BITMAP_SET(data->start_class, 'S');
+                           }
+                       }
                    }
                }
                else if (uc >= 0x100) {
@@ -3342,6 +3355,19 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                              * run-time */
                             ANYOF_BITMAP_SET(data->start_class,
                                             PL_fold_latin1[uc]);
+
+                           /* All folds except under /iaa that include s, S,
+                            * and sharp_s also may include the others */
+                           if (OP(scan) != EXACTFA) {
+                               if (uc == 's' || uc == 'S') {
+                                   ANYOF_BITMAP_SET(data->start_class,
+                                                  LATIN_SMALL_LETTER_SHARP_S);
+                               }
+                               else if (uc == LATIN_SMALL_LETTER_SHARP_S) {
+                                   ANYOF_BITMAP_SET(data->start_class, 's');
+                                   ANYOF_BITMAP_SET(data->start_class, 'S');
+                               }
+                           }
                         }
                    }
                    data->start_class->flags &= ~ANYOF_EOS;