perlrecharclass.pod: Update for /a

[perl5.git] / handy.h
diff --git a/handy.h b/handy.h

index d966bfe..b12104d 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -208,19 +208,6 @@ typedef U64TYPE U64;
  #   endif
  #endif
  
-/* HMB H.Merijn Brand - a placeholder for preparing Configure patches:
- * 
- * USE_DTRACE  HAS_PSEUDOFORK  HAS_TIMEGM      LOCALTIME_R_NEEDS_TZSET 
- * GMTIME_MAX  GMTIME_MIN      LOCALTIME_MAX   LOCALTIME_MIN
- * HAS_CTIME64 HAS_LOCALTIME64 HAS_GMTIME64    HAS_DIFFTIME64
- * HAS_MKTIME64        HAS_ASCTIME64   HAS_GETADDRINFO HAS_GETNAMEINFO
- * HAS_INETNTOP        HAS_INETPTON    CHARBITS        HAS_PRCTL
- * Not (yet) used at top level, but mention them for metaconfig
- */
-
-/* Mention I8SIZE, U8SIZE, I16SIZE, U16SIZE, I32SIZE, U32SIZE,
-   I64SIZE, and U64SIZE here so that metaconfig pulls them in. */
-
  #if defined(UINT8_MAX) && defined(INT16_MAX) && defined(INT32_MAX)
  
  /* I8_MAX and I8_MIN constants are not defined, as I8 is an ambiguous type.
@@ -609,6 +596,7 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
  #   define isUPPER_A(c)  cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_UPPER_A))
  #   define isWORDCHAR_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_WORDCHAR_A))
  #   define isXDIGIT_A(c)  cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_XDIGIT_A))
+#   define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_NONLATIN1_FOLD))
  #else   /* No perl.h. */
  #   define isOCTAL_A(c)  ((c) >= '0' && (c) <= '9')
  #   ifdef EBCDIC
@@ -849,10 +837,16 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
  #define isBLANK_LC_uni(c)      isBLANK(c) /* could be wrong */
  
  #define isALNUM_utf8(p)                is_utf8_alnum(p)
-/* The ID_Start of Unicode is quite limiting: it assumes a L-class
- * character (meaning that you cannot have, say, a CJK character).
- * Instead, let's allow ID_Continue but not digits. */
-#define isIDFIRST_utf8(p)      (is_utf8_idcont(p) && !is_utf8_digit(p))
+/* The ID_Start of Unicode was originally quite limiting: it assumed an
+ * L-class character (meaning that you could not have, say, a CJK charac-
+ * ter). So, instead, perl has for a long time allowed ID_Continue but
+ * not digits.
+ * We still preserve that for backward compatibility. But we also make sure
+ * that it is alphanumeric, so S_scan_word in toke.c will not hang. See
+ *    http://rt.perl.org/rt3/Ticket/Display.html?id=74022
+ * for more detail than you ever wanted to know about. */
+#define isIDFIRST_utf8(p) \
+    (is_utf8_idcont(p) && !is_utf8_digit(p) && is_utf8_alnum(p))
  #define isALPHA_utf8(p)                is_utf8_alpha(p)
  #define isSPACE_utf8(p)                is_utf8_space(p)
  #define isDIGIT_utf8(p)                is_utf8_digit(p)