* additionally will populate the node's STRING with <code_point>, if <len>
* is 0. In both cases <*flagp> is appropriately set
*
- * It knows that under FOLD, UTF characters and the Latin Sharp S must be
- * folded (the latter only when the rules indicate it can match 'ss') */
+ * It knows that under FOLD, the Latin Sharp S and UTF characters above
+ * 255, must be folded (the former only when the rules indicate it can
+ * match 'ss') */
bool len_passed_in = cBOOL(len != 0);
U8 character[UTF8_MAXBYTES_CASE+1];
if (! len_passed_in) {
if (UTF) {
- if (FOLD) {
- to_uni_fold(NATIVE_TO_UNI(code_point), character, &len);
+ if (FOLD && (! LOC || code_point > 255)) {
+ _to_uni_fold_flags(NATIVE_TO_UNI(code_point),
+ character,
+ &len,
+ FOLD_FLAGS_FULL | ((LOC)
+ ? FOLD_FLAGS_LOCALE
+ : (ASCII_FOLD_RESTRICTED)
+ ? FOLD_FLAGS_NOMIX_ASCII
+ : 0));
}
else {
uvchr_to_utf8( character, code_point);
foreach my $bracketed (0, 1) { # Put rhs in [...], or not
next if $bracketed && @pattern != 1; # bracketed makes these
# or's instead of a sequence
+ foreach my $optimize_bracketed (0, 1) {
+ next if $optimize_bracketed && ! $bracketed;
foreach my $inverted (0,1) {
next if $inverted && ! $bracketed; # inversion only valid in [^...]
next if $inverted && @target != 1; # [perl #89750] multi-char
$rhs .= $rhs_char;
# Add a character to the class, so class doesn't get
- # optimized out
- $rhs .= '_]' if $bracketed;
+ # optimized out, unless we are testing that optimization
+ $rhs .= '_' if $optimize_bracketed;
+ $rhs .= ']' if $bracketed;
}
# Add one of: no capturing parens
}
}
}
+ }
}
}
unless($list_all_tests) {