From c440a570f986f52b764752007c070e8549b2bf7e Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 17 Mar 2015 16:56:34 -0600 Subject: [PATCH] regcomp.sym: Update \b descriptions --- pod/perldebguts.pod | 28 +++++++++++++--------------- regcomp.sym | 14 +++++++------- regnodes.h | 14 +++++++------- 3 files changed, 27 insertions(+), 29 deletions(-) diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod index 591e69b..2b5561d 100644 --- a/pod/perldebguts.pod +++ b/pod/perldebguts.pod @@ -572,24 +572,22 @@ will be lost. GPOS no Matches where last m//g left off. # Word Boundary Opcodes: - BOUND no Match "" at any word boundary using native - charset rules for non-utf8, otherwise - Unicode rules - BOUNDL no Match "" at any boundary of a given type - using locale rules + BOUND no Like BOUNDA for non-utf8, otherwise match "" + between any Unicode \w\W or \W\w + BOUNDL no Like BOUND/BOUNDU, but \w and \W are defined + by current locale BOUNDU no Match "" at any boundary of a given type using Unicode rules - BOUNDA no Match "" at any boundary of a given type - using ASCII rules - NBOUND no Match "" at any word non-boundary using - native charset rules for non-utf8, otherwise - Unicode rules - NBOUNDL no Match "" at any boundary of a given type - using locale rules - NBOUNDU no Match "" at any boundary of a given type + BOUNDA no Match "" at any boundary between \w\W or + \W\w, where \w is [_a-zA-Z0-9] + NBOUND no Like NBOUNDA for non-utf8, otherwise match + "" between any Unicode \w\w or \W\W + NBOUNDL no Like NBOUND/NBOUNDU, but \w and \W are + defined by current locale + NBOUNDU no Match "" at any non-boundary of a given type using using Unicode rules - NBOUNDA no Match "" at any boundary of a given type - using using ASCII rules + NBOUNDA no Match "" betweeen any \w\w or \W\W, where \w + is [_a-zA-Z0-9] # [Special] alternatives: REG_ANY no Match any one character (except newline). diff --git a/regcomp.sym b/regcomp.sym index 7daa241..f79b874 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -43,15 +43,15 @@ GPOS GPOS, no ; Matches where last m//g left off. # in regcomp.c uses the enum value of the modifier as an offset from the /d # version. The complements must come after the non-complements. # BOUND, POSIX and their complements are affected, as well as EXACTF. -BOUND BOUND, no ; Match "" at any word boundary using native charset rules for non-utf8, otherwise Unicode rules -BOUNDL BOUND, no ; Match "" at any boundary of a given type using locale rules +BOUND BOUND, no ; Like BOUNDA for non-utf8, otherwise match "" between any Unicode \w\W or \W\w +BOUNDL BOUND, no ; Like BOUND/BOUNDU, but \w and \W are defined by current locale BOUNDU BOUND, no ; Match "" at any boundary of a given type using Unicode rules -BOUNDA BOUND, no ; Match "" at any boundary of a given type using ASCII rules +BOUNDA BOUND, no ; Match "" at any boundary between \w\W or \W\w, where \w is [_a-zA-Z0-9] # All NBOUND nodes are required by code in regexec.c to be greater than all BOUND ones -NBOUND NBOUND, no ; Match "" at any word non-boundary using native charset rules for non-utf8, otherwise Unicode rules -NBOUNDL NBOUND, no ; Match "" at any boundary of a given type using locale rules -NBOUNDU NBOUND, no ; Match "" at any boundary of a given type using using Unicode rules -NBOUNDA NBOUND, no ; Match "" at any boundary of a given type using using ASCII rules +NBOUND NBOUND, no ; Like NBOUNDA for non-utf8, otherwise match "" between any Unicode \w\w or \W\W +NBOUNDL NBOUND, no ; Like NBOUND/NBOUNDU, but \w and \W are defined by current locale +NBOUNDU NBOUND, no ; Match "" at any non-boundary of a given type using using Unicode rules +NBOUNDA NBOUND, no ; Match "" betweeen any \w\w or \W\W, where \w is [_a-zA-Z0-9] #* [Special] alternatives: REG_ANY REG_ANY, no 0 S ; Match any one character (except newline). diff --git a/regnodes.h b/regnodes.h index 144d6f6..3c9b991 100644 --- a/regnodes.h +++ b/regnodes.h @@ -19,14 +19,14 @@ #define MEOL 5 /* 0x05 Same, assuming multiline: /$/m */ #define EOS 6 /* 0x06 Match "" at end of string: /\z/ */ #define GPOS 7 /* 0x07 Matches where last m//g left off. */ -#define BOUND 8 /* 0x08 Match "" at any word boundary using native charset rules for non-utf8, otherwise Unicode rules */ -#define BOUNDL 9 /* 0x09 Match "" at any boundary of a given type using locale rules */ +#define BOUND 8 /* 0x08 Like BOUNDA for non-utf8, otherwise match "" between any Unicode \w\W or \W\w */ +#define BOUNDL 9 /* 0x09 Like BOUND/BOUNDU, but \w and \W are defined by current locale */ #define BOUNDU 10 /* 0x0a Match "" at any boundary of a given type using Unicode rules */ -#define BOUNDA 11 /* 0x0b Match "" at any boundary of a given type using ASCII rules */ -#define NBOUND 12 /* 0x0c Match "" at any word non-boundary using native charset rules for non-utf8, otherwise Unicode rules */ -#define NBOUNDL 13 /* 0x0d Match "" at any boundary of a given type using locale rules */ -#define NBOUNDU 14 /* 0x0e Match "" at any boundary of a given type using using Unicode rules */ -#define NBOUNDA 15 /* 0x0f Match "" at any boundary of a given type using using ASCII rules */ +#define BOUNDA 11 /* 0x0b Match "" at any boundary between \w\W or \W\w, where \w is [_a-zA-Z0-9] */ +#define NBOUND 12 /* 0x0c Like NBOUNDA for non-utf8, otherwise match "" between any Unicode \w\w or \W\W */ +#define NBOUNDL 13 /* 0x0d Like NBOUND/NBOUNDU, but \w and \W are defined by current locale */ +#define NBOUNDU 14 /* 0x0e Match "" at any non-boundary of a given type using using Unicode rules */ +#define NBOUNDA 15 /* 0x0f Match "" betweeen any \w\w or \W\W, where \w is [_a-zA-Z0-9] */ #define REG_ANY 16 /* 0x10 Match any one character (except newline). */ #define SANY 17 /* 0x11 Match any one character. */ #define CANY 18 /* 0x12 Match any one byte. */ -- 1.8.3.1