- ANYOF_FLAGS(ret) = 0;
- }
- return ret;
-}
-
-STATIC regnode *
-S_regclassutf8(pTHX)
-{
- dTHR;
- register char *e;
- register U32 value;
- register U32 lastvalue = OOB_UTF8;
- register I32 range = 0;
- register regnode *ret;
- I32 numlen;
- I32 n;
- SV *listsv;
- U8 flags = 0;
- I32 namedclass;
- char *rangebegin;
-
- if (*PL_regcomp_parse == '^') { /* Complement of range. */
- PL_regnaughty++;
- PL_regcomp_parse++;
- if (!SIZE_ONLY)
- flags |= ANYOF_INVERT;
- }
- if (!SIZE_ONLY) {
- if (FOLD)
- flags |= ANYOF_FOLD;
- if (LOC)
- flags |= ANYOF_LOCALE;
- listsv = newSVpvn("# comment\n",10);
- }
-
- if (!SIZE_ONLY && ckWARN(WARN_REGEXP))
- checkposixcc();
-
- if (*PL_regcomp_parse == ']' || *PL_regcomp_parse == '-')
- goto skipcond; /* allow 1st char to be ] or - */
-
- while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
- skipcond:
- namedclass = OOB_NAMEDCLASS;
- if (!range)
- rangebegin = PL_regcomp_parse;
- value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
- PL_regcomp_parse += numlen;
- if (value == '[')
- namedclass = regpposixcc(value);
- else if (value == '\\') {
- value = (U32)utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
- PL_regcomp_parse += numlen;
- /* Some compilers cannot handle switching on 64-bit integer
- * values, therefore value cannot be an UV. Yes, this will
- * be a problem later if we want switch on Unicode. --jhi */
- switch (value) {
- case 'w': namedclass = ANYOF_ALNUM; break;
- case 'W': namedclass = ANYOF_NALNUM; break;
- case 's': namedclass = ANYOF_SPACE; break;
- case 'S': namedclass = ANYOF_NSPACE; break;
- case 'd': namedclass = ANYOF_DIGIT; break;
- case 'D': namedclass = ANYOF_NDIGIT; break;
- case 'p':
- case 'P':
- if (*PL_regcomp_parse == '{') {
- e = strchr(PL_regcomp_parse++, '}');
- if (!e)
- vFAIL("Missing right brace on \\p{}");
- n = e - PL_regcomp_parse;
- }
- else {
- e = PL_regcomp_parse;
- n = 1;
- }
- if (!SIZE_ONLY) {
- if (value == 'p')
- Perl_sv_catpvf(aTHX_ listsv,
- "+utf8::%.*s\n", (int)n, PL_regcomp_parse);
- else
- Perl_sv_catpvf(aTHX_ listsv,
- "!utf8::%.*s\n", (int)n, PL_regcomp_parse);
- }
- PL_regcomp_parse = e + 1;
- lastvalue = OOB_UTF8;
- continue;
- case 'n': value = '\n'; break;
- case 'r': value = '\r'; break;
- case 't': value = '\t'; break;
- case 'f': value = '\f'; break;
- case 'b': value = '\b'; break;
-#ifdef ASCIIish
- case 'e': value = '\033'; break;
- case 'a': value = '\007'; break;
-#else
- case 'e': value = '\047'; break;
- case 'a': value = '\057'; break;
-#endif
- case 'x':
- if (*PL_regcomp_parse == '{') {
- e = strchr(PL_regcomp_parse++, '}');
- if (!e)
- vFAIL("Missing right brace on \\x{}");
- numlen = 1; /* allow underscores */
- value = (UV)scan_hex(PL_regcomp_parse,
- e - PL_regcomp_parse,
- &numlen);
- PL_regcomp_parse = e + 1;
- }
- else {
- numlen = 0; /* disallow underscores */
- value = (UV)scan_hex(PL_regcomp_parse, 2, &numlen);
- PL_regcomp_parse += numlen;
- }
- break;
- case 'c':
- value = UCHARAT(PL_regcomp_parse++);
- value = toCTRL(value);
- break;
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- numlen = 0; /* disallow underscores */
- value = (UV)scan_oct(--PL_regcomp_parse, 3, &numlen);
- PL_regcomp_parse += numlen;
- break;
- default:
- if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(value))
- vWARN2(PL_regcomp_parse,
- "Unrecognized escape \\%c in character class passed through",
- (int)value);
- break;
- }
- }
- if (namedclass > OOB_NAMEDCLASS) {
- if (range) { /* a-\d, a-[:digit:] */
- if (!SIZE_ONLY) {
- if (ckWARN(WARN_REGEXP))
- vWARN4(PL_regcomp_parse,
- "False [] range \"%*.*s\"",
- PL_regcomp_parse - rangebegin,
- PL_regcomp_parse - rangebegin,
- rangebegin);
- Perl_sv_catpvf(aTHX_ listsv,
- /* 0x002D is Unicode for '-' */
- "%04"UVxf"\n002D\n", (UV)lastvalue);
- }
- range = 0;
- }
- if (!SIZE_ONLY) {
- switch (namedclass) {
- case ANYOF_ALNUM:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break;
- case ANYOF_NALNUM:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break;
- case ANYOF_ALNUMC:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break;
- case ANYOF_NALNUMC:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break;
- case ANYOF_ALPHA:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break;
- case ANYOF_NALPHA:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break;
- case ANYOF_ASCII:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break;
- case ANYOF_NASCII:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break;
- case ANYOF_CNTRL:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break;
- case ANYOF_NCNTRL:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); break;
- case ANYOF_GRAPH:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break;
- case ANYOF_NGRAPH:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break;
- case ANYOF_DIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); break;
- case ANYOF_NDIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break;
- case ANYOF_LOWER:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break;
- case ANYOF_NLOWER:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break;
- case ANYOF_PRINT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break;
- case ANYOF_NPRINT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break;
- case ANYOF_PUNCT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break;
- case ANYOF_NPUNCT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break;
- case ANYOF_SPACE:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break;
- case ANYOF_NSPACE:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break;
- case ANYOF_UPPER:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break;
- case ANYOF_NUPPER:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break;
- case ANYOF_XDIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break;
- case ANYOF_NXDIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break;
- }
- continue;
- }
- }
- if (range) {
- if (lastvalue > value) { /* b-a */
- Simple_vFAIL4("invalid [] range \"%*.*s\"",
- PL_regcomp_parse - rangebegin,
- PL_regcomp_parse - rangebegin,
- rangebegin);
- }
- range = 0;
- }
- else {
- lastvalue = value;
- if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
- PL_regcomp_parse[1] != ']') {
- PL_regcomp_parse++;
- if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */
- if (ckWARN(WARN_REGEXP))
- vWARN4(PL_regcomp_parse,
- "False [] range \"%*.*s\"",
- PL_regcomp_parse - rangebegin,
- PL_regcomp_parse - rangebegin,
- rangebegin);
- if (!SIZE_ONLY)
- Perl_sv_catpvf(aTHX_ listsv,
- /* 0x002D is Unicode for '-' */
- "002D\n");
- } else
- range = 1;
- continue; /* do it next time */
- }
- }
- /* now is the next time */
- if (!SIZE_ONLY)
- Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
- (UV)lastvalue, (UV)value);
- range = 0;