perl5.git.perl.org Git - perl5.git/blame_incremental

Commit	Line	Data
	1	# regcomp.sym
	2	#
	3	# File has two sections, divided by a line of dashes '-'.
	4	#
	5	# Empty rows after #-comment are removed from input are ignored
	6	#
	7	# First section is for regops, second section is for regmatch-states
	8	#
	9	# Note that the order in this file is important.
	10	#
	11	# Format for first section:
	12	# NAME \s+ TYPE, arg-description [flags] [num-args] [longjump-len] ; DESCRIPTION
	13	#
	14	#
	15	# run perl regen.pl after editing this file
	16
	17
	18
	19	#* Exit points
	20
	21	END END, no ; End of program.
	22	SUCCEED END, no ; Return from a subroutine, basically.
	23
	24	#* Anchors:
	25
	26	BOL BOL, no ; Match "" at beginning of line.
	27	MBOL BOL, no ; Same, assuming multiline.
	28	SBOL BOL, no ; Same, assuming singleline.
	29	EOS EOL, no ; Match "" at end of string.
	30	EOL EOL, no ; Match "" at end of line.
	31	MEOL EOL, no ; Same, assuming multiline.
	32	SEOL EOL, no ; Same, assuming singleline.
	33	BOUND BOUND, no ; Match "" at any word boundary using native charset semantics for non-utf8
	34	BOUNDL BOUND, no ; Match "" at any locale word boundary
	35	BOUNDU BOUND, no ; Match "" at any word boundary using Unicode semantics
	36	BOUNDA BOUND, no ; Match "" at any word boundary using ASCII semantics
	37	# All NBOUND nodes are required by a line regexec.c to be greater than all BOUND ones
	38	NBOUND NBOUND, no ; Match "" at any word non-boundary using native charset semantics for non-utf8
	39	NBOUNDL NBOUND, no ; Match "" at any locale word non-boundary
	40	NBOUNDU NBOUND, no ; Match "" at any word non-boundary using Unicode semantics
	41	NBOUNDA NBOUND, no ; Match "" at any word non-boundary using ASCII semantics
	42	GPOS GPOS, no ; Matches where last m//g left off.
	43
	44	#* [Special] alternatives:
	45
	46	REG_ANY REG_ANY, no 0 S ; Match any one character (except newline).
	47	SANY REG_ANY, no 0 S ; Match any one character.
	48	CANY REG_ANY, no 0 S ; Match any one byte.
	49	ANYOF ANYOF, sv 0 S ; Match character in (or not in) this class, single char match only
	50	ANYOFV ANYOF, sv 0 V ; Match character in (or not in) this class, can match-multiple chars
	51	ALNUM ALNUM, no 0 S ; Match any alphanumeric character using native charset semantics for non-utf8
	52	ALNUML ALNUM, no 0 S ; Match any alphanumeric char in locale
	53	ALNUMU ALNUM, no 0 S ; Match any alphanumeric char using Unicode semantics
	54	ALNUMA ALNUM, no 0 S ; Match [A-Za-z_0-9]
	55	NALNUM NALNUM, no 0 S ; Match any non-alphanumeric character using native charset semantics for non-utf8
	56	NALNUML NALNUM, no 0 S ; Match any non-alphanumeric char in locale
	57	NALNUMU NALNUM, no 0 S ; Match any non-alphanumeric char using Unicode semantics
	58	NALNUMA NALNUM, no 0 S ; Match [^A-Za-z_0-9]
	59	SPACE SPACE, no 0 S ; Match any whitespace character using native charset semantics for non-utf8
	60	SPACEL SPACE, no 0 S ; Match any whitespace char in locale
	61	SPACEU SPACE, no 0 S ; Match any whitespace char using Unicode semantics
	62	SPACEA SPACE, no 0 S ; Match [ \t\n\f\r]
	63	NSPACE NSPACE, no 0 S ; Match any non-whitespace character using native charset semantics for non-utf8
	64	NSPACEL NSPACE, no 0 S ; Match any non-whitespace char in locale
	65	NSPACEU NSPACE, no 0 S ; Match any non-whitespace char using Unicode semantics
	66	NSPACEA NSPACE, no 0 S ; Match [^ \t\n\f\r]
	67	DIGIT DIGIT, no 0 S ; Match any numeric character using native charset semantics for non-utf8
	68	DIGITL DIGIT, no 0 S ; Match any numeric character in locale
	69	DIGITA DIGIT, no 0 S ; Match [0-9]
	70	NDIGIT NDIGIT, no 0 S ; Match any non-numeric character using native charset semantics for non-utf8
	71	NDIGITL NDIGIT, no 0 S ; Match any non-numeric character in locale
	72	NDIGITA NDIGIT, no 0 S ; Match [^0-9]
	73	CLUMP CLUMP, no 0 V ; Match any extended grapheme cluster sequence
	74
	75	#* Alternation
	76
	77	# BRANCH The set of branches constituting a single choice are hooked
	78	# together with their "next" pointers, since precedence prevents
	79	# anything being concatenated to any individual branch. The
	80	# "next" pointer of the last BRANCH in a choice points to the
	81	# thing following the whole choice. This is also where the
	82	# final "next" pointer of each individual branch points; each
	83	# branch starts with the operand node of a BRANCH node.
	84	#
	85	BRANCH BRANCH, node 0 V ; Match this alternative, or the next...
	86
	87	#*Back pointer
	88
	89	# BACK Normal "next" pointers all implicitly point forward; BACK
	90	# exists to make loop structures possible.
	91	# not used
	92	BACK BACK, no 0 V ; Match "", "next" ptr points backward.
	93
	94	#*Literals
	95
	96	EXACT EXACT, str ; Match this string (preceded by length).
	97	EXACTF EXACT, str ; Match this string, folded, native charset semantics for non-utf8 (prec. by length).
	98	EXACTFL EXACT, str ; Match this string, folded in locale (w/len).
	99	EXACTFU EXACT, str ; Match this string, folded, Unicode semantics for non-utf8 (prec. by length).
	100
	101	#*Do nothing types
	102
	103	NOTHING NOTHING, no ; Match empty string.
	104	# A variant of above which delimits a group, thus stops optimizations
	105	TAIL NOTHING, no ; Match empty string. Can jump here from outside.
	106
	107	#*Loops
	108
	109	# STAR,PLUS '?', and complex '*' and '+', are implemented as circular
	110	# BRANCH structures using BACK. Simple cases (one character
	111	# per match) are implemented with STAR and PLUS for speed
	112	# and to minimize recursive plunges.
	113	#
	114	STAR STAR, node 0 V ; Match this (simple) thing 0 or more times.
	115	PLUS PLUS, node 0 V ; Match this (simple) thing 1 or more times.
	116
	117	CURLY CURLY, sv 2 V ; Match this simple thing {n,m} times.
	118	CURLYN CURLY, no 2 V ; Capture next-after-this simple thing
	119	CURLYM CURLY, no 2 V ; Capture this medium-complex thing {n,m} times.
	120	CURLYX CURLY, sv 2 V ; Match this complex thing {n,m} times.
	121
	122	# This terminator creates a loop structure for CURLYX
	123	WHILEM WHILEM, no 0 V ; Do curly processing and see if rest matches.
	124
	125	#*Buffer related
	126
	127	# OPEN,CLOSE,GROUPP ...are numbered at compile time.
	128	OPEN OPEN, num 1 ; Mark this point in input as start of #n.
	129	CLOSE CLOSE, num 1 ; Analogous to OPEN.
	130
	131	REF REF, num 1 V ; Match some already matched string
	132	REFF REF, num 1 V ; Match already matched string, folded using native charset semantics for non-utf8
	133	REFFL REF, num 1 V ; Match already matched string, folded in loc.
	134	# REFFU and NREFFU could have been implemented using the FLAGS field of the
	135	# regnode, but by having a separate node type, we can use the existing switch
	136	# statement to avoid some tests
	137	REFFU REF, num 1 V ; Match already matched string, folded using unicode semantics for non-utf8
	138
	139	#*Named references. Code in regcomp.c assumes that these all are after the numbered references
	140	NREF REF, no-sv 1 V ; Match some already matched string
	141	NREFF REF, no-sv 1 V ; Match already matched string, folded using native charset semantics for non-utf8
	142	NREFFL REF, no-sv 1 V ; Match already matched string, folded in loc.
	143	NREFFU REF, num 1 V ; Match already matched string, folded using unicode semantics for non-utf8
	144
	145	IFMATCH BRANCHJ, off 1 . 2 ; Succeeds if the following matches.
	146	UNLESSM BRANCHJ, off 1 . 2 ; Fails if the following matches.
	147	SUSPEND BRANCHJ, off 1 V 1 ; "Independent" sub-RE.
	148	IFTHEN BRANCHJ, off 1 V 1 ; Switch, should be preceded by switcher .
	149	GROUPP GROUPP, num 1 ; Whether the group matched.
	150
	151	#*Support for long RE
	152
	153	LONGJMP LONGJMP, off 1 . 1 ; Jump far away.
	154	BRANCHJ BRANCHJ, off 1 V 1 ; BRANCH with long offset.
	155
	156	#*The heavy worker
	157
	158	EVAL EVAL, evl 1 ; Execute some Perl code.
	159
	160	#*Modifiers
	161
	162	MINMOD MINMOD, no ; Next operator is not greedy.
	163	LOGICAL LOGICAL, no ; Next opcode should set the flag only.
	164
	165	# This is not used yet
	166	RENUM BRANCHJ, off 1 . 1 ; Group with independently numbered parens.
	167
	168	#*Trie Related
	169
	170	# Behave the same as A\|LIST\|OF\|WORDS would. The '..C' variants have
	171	# inline charclass data (ascii only), the 'C' store it in the structure.
	172	# NOTE: the relative order of the TRIE-like regops is significant
	173
	174	TRIE TRIE, trie 1 ; Match many EXACT(F[LU]?)? at once. flags==type
	175	TRIEC TRIE,trie charclass ; Same as TRIE, but with embedded charclass data
	176
	177	# For start classes, contains an added fail table.
	178	AHOCORASICK TRIE, trie 1 ; Aho Corasick stclass. flags==type
	179	AHOCORASICKC TRIE,trie charclass ; Same as AHOCORASICK, but with embedded charclass data
	180
	181	#*Regex Subroutines
	182	GOSUB GOSUB, num/ofs 2L ; recurse to paren arg1 at (signed) ofs arg2
	183	GOSTART GOSTART, no ; recurse to start of pattern
	184
	185	#*Special conditionals
	186	NGROUPP NGROUPP, no-sv 1 ; Whether the group matched.
	187	INSUBP INSUBP, num 1 ; Whether we are in a specific recurse.
	188	DEFINEP DEFINEP, none 1 ; Never execute directly.
	189
	190	#*Backtracking Verbs
	191	ENDLIKE ENDLIKE, none ; Used only for the type field of verbs
	192	OPFAIL ENDLIKE, none ; Same as (?!)
	193	ACCEPT ENDLIKE, parno 1 ; Accepts the current matched string.
	194
	195
	196	#*Verbs With Arguments
	197	VERB VERB, no-sv 1 ; Used only for the type field of verbs
	198	PRUNE VERB, no-sv 1 ; Pattern fails at this startpoint if no-backtracking through this
	199	MARKPOINT VERB, no-sv 1 ; Push the current location for rollback by cut.
	200	SKIP VERB, no-sv 1 ; On failure skip forward (to the mark) before retrying
	201	COMMIT VERB, no-sv 1 ; Pattern fails outright if backtracking through this
	202	CUTGROUP VERB, no-sv 1 ; On failure go to the next alternation in the group
	203
	204	#*Control what to keep in $&.
	205	KEEPS KEEPS, no ; $& begins here.
	206
	207	#*New charclass like patterns
	208	LNBREAK LNBREAK, none ; generic newline pattern
	209	VERTWS VERTWS, none 0 S ; vertical whitespace (Perl 6)
	210	NVERTWS NVERTWS, none 0 S ; not vertical whitespace (Perl 6)
	211	HORIZWS HORIZWS, none 0 S ; horizontal whitespace (Perl 6)
	212	NHORIZWS NHORIZWS, none 0 S ; not horizontal whitespace (Perl 6)
	213
	214	FOLDCHAR FOLDCHAR, codepoint 1 ; codepoint with tricky case folding properties.
	215
	216
	217	# NEW STUFF SOMEWHERE ABOVE THIS LINE
	218
	219	################################################################################
	220
	221	#*SPECIAL REGOPS
	222
	223	# This is not really a node, but an optimized away piece of a "long" node.
	224	# To simplify debugging output, we mark it as if it were a node
	225	OPTIMIZED NOTHING, off ; Placeholder for dump.
	226
	227	# Special opcode with the property that no opcode in a compiled program
	228	# will ever be of this type. Thus it can be used as a flag value that
	229	# no other opcode has been seen. END is used similarly, in that an END
	230	# node cant be optimized. So END implies "unoptimizable" and PSEUDO mean
	231	# "not seen anything to optimize yet".
	232	PSEUDO PSEUDO, off ; Pseudo opcode for internal use.
	233
	234	-------------------------------------------------------------------------------
	235	# Format for second section:
	236	# REGOP \t typelist [ \t typelist] [# Comment]
	237	# typelist= namelist
	238	# = namelist:FAIL
	239	# = name:count
	240
	241	# Anything below is a state
	242	#
	243	#
	244	TRIE next:FAIL
	245	EVAL AB:FAIL
	246	CURLYX end:FAIL
	247	WHILEM A_pre,A_min,A_max,B_min,B_max:FAIL
	248	BRANCH next:FAIL
	249	CURLYM A,B:FAIL
	250	IFMATCH A:FAIL
	251	CURLY B_min_known,B_min,B_max:FAIL
	252	COMMIT next:FAIL
	253	MARKPOINT next:FAIL
	254	SKIP next:FAIL
	255	CUTGROUP next:FAIL
	256	KEEPS next:FAIL

1

# regcomp.sym

2

#

3

# File has two sections, divided by a line of dashes '-'.

4

#

5

# Empty rows after #-comment are removed from input are ignored

6

#

7

# First section is for regops, second section is for regmatch-states

8

#

9

# Note that the order in this file is important.

10

#

11

# Format for first section:

12

# NAME \s+ TYPE, arg-description [flags] [num-args] [longjump-len] ; DESCRIPTION

13

#

14

#

15

# run perl regen.pl after editing this file

#* Exit points

END END, no ; End of program.

22

SUCCEED END, no ; Return from a subroutine, basically.

#* Anchors:

BOL BOL, no ; Match "" at beginning of line.

27

MBOL BOL, no ; Same, assuming multiline.

28

SBOL BOL, no ; Same, assuming singleline.

29

EOS EOL, no ; Match "" at end of string.

30

EOL EOL, no ; Match "" at end of line.

31

MEOL EOL, no ; Same, assuming multiline.

32

SEOL EOL, no ; Same, assuming singleline.

33

BOUND BOUND, no ; Match "" at any word boundary using native charset semantics for non-utf8

34

BOUNDL BOUND, no ; Match "" at any locale word boundary

35

BOUNDU BOUND, no ; Match "" at any word boundary using Unicode semantics

36

BOUNDA BOUND, no ; Match "" at any word boundary using ASCII semantics

37

# All NBOUND nodes are required by a line regexec.c to be greater than all BOUND ones

38

NBOUND NBOUND, no ; Match "" at any word non-boundary using native charset semantics for non-utf8

39

NBOUNDL NBOUND, no ; Match "" at any locale word non-boundary

40

NBOUNDU NBOUND, no ; Match "" at any word non-boundary using Unicode semantics

41

NBOUNDA NBOUND, no ; Match "" at any word non-boundary using ASCII semantics

42

GPOS GPOS, no ; Matches where last m//g left off.

43

44

#* [Special] alternatives:

45

46

REG_ANY REG_ANY, no 0 S ; Match any one character (except newline).

47

SANY REG_ANY, no 0 S ; Match any one character.

48

CANY REG_ANY, no 0 S ; Match any one byte.

49

ANYOF ANYOF, sv 0 S ; Match character in (or not in) this class, single char match only

50

ANYOFV ANYOF, sv 0 V ; Match character in (or not in) this class, can match-multiple chars

51

ALNUM ALNUM, no 0 S ; Match any alphanumeric character using native charset semantics for non-utf8

52

ALNUML ALNUM, no 0 S ; Match any alphanumeric char in locale

53

ALNUMU ALNUM, no 0 S ; Match any alphanumeric char using Unicode semantics

54

ALNUMA ALNUM, no 0 S ; Match [A-Za-z_0-9]

55

NALNUM NALNUM, no 0 S ; Match any non-alphanumeric character using native charset semantics for non-utf8

56

NALNUML NALNUM, no 0 S ; Match any non-alphanumeric char in locale

57

NALNUMU NALNUM, no 0 S ; Match any non-alphanumeric char using Unicode semantics

58

NALNUMA NALNUM, no 0 S ; Match [^A-Za-z_0-9]

59

SPACE SPACE, no 0 S ; Match any whitespace character using native charset semantics for non-utf8

60

SPACEL SPACE, no 0 S ; Match any whitespace char in locale

61

SPACEU SPACE, no 0 S ; Match any whitespace char using Unicode semantics

62

SPACEA SPACE, no 0 S ; Match [ \t\n\f\r]

63

NSPACE NSPACE, no 0 S ; Match any non-whitespace character using native charset semantics for non-utf8

64

NSPACEL NSPACE, no 0 S ; Match any non-whitespace char in locale

65

NSPACEU NSPACE, no 0 S ; Match any non-whitespace char using Unicode semantics

66

NSPACEA NSPACE, no 0 S ; Match [^ \t\n\f\r]

67

DIGIT DIGIT, no 0 S ; Match any numeric character using native charset semantics for non-utf8

68

DIGITL DIGIT, no 0 S ; Match any numeric character in locale

69

DIGITA DIGIT, no 0 S ; Match [0-9]

70

NDIGIT NDIGIT, no 0 S ; Match any non-numeric character using native charset semantics for non-utf8

71

NDIGITL NDIGIT, no 0 S ; Match any non-numeric character in locale

72

NDIGITA NDIGIT, no 0 S ; Match [^0-9]

73

CLUMP CLUMP, no 0 V ; Match any extended grapheme cluster sequence

#* Alternation

# BRANCH The set of branches constituting a single choice are hooked

78

# together with their "next" pointers, since precedence prevents

79

# anything being concatenated to any individual branch. The

80

# "next" pointer of the last BRANCH in a choice points to the

81

# thing following the whole choice. This is also where the

82

# final "next" pointer of each individual branch points; each

83

# branch starts with the operand node of a BRANCH node.

84

#

85

BRANCH BRANCH, node 0 V ; Match this alternative, or the next...

#*Back pointer

# BACK Normal "next" pointers all implicitly point forward; BACK

90

# exists to make loop structures possible.

91

# not used

92

BACK BACK, no 0 V ; Match "", "next" ptr points backward.

#*Literals

EXACT EXACT, str ; Match this string (preceded by length).

97

EXACTF EXACT, str ; Match this string, folded, native charset semantics for non-utf8 (prec. by length).

98

EXACTFL EXACT, str ; Match this string, folded in locale (w/len).

99

EXACTFU EXACT, str ; Match this string, folded, Unicode semantics for non-utf8 (prec. by length).

#*Do nothing types

NOTHING NOTHING, no ; Match empty string.

104

# A variant of above which delimits a group, thus stops optimizations

105

TAIL NOTHING, no ; Match empty string. Can jump here from outside.

#*Loops

# STAR,PLUS '?', and complex '*' and '+', are implemented as circular

110

# BRANCH structures using BACK. Simple cases (one character

111

# per match) are implemented with STAR and PLUS for speed

112

# and to minimize recursive plunges.

113

#

114

STAR STAR, node 0 V ; Match this (simple) thing 0 or more times.

115

PLUS PLUS, node 0 V ; Match this (simple) thing 1 or more times.

116

117

CURLY CURLY, sv 2 V ; Match this simple thing {n,m} times.

118

CURLYN CURLY, no 2 V ; Capture next-after-this simple thing

119

CURLYM CURLY, no 2 V ; Capture this medium-complex thing {n,m} times.

120

CURLYX CURLY, sv 2 V ; Match this complex thing {n,m} times.

121

122

# This terminator creates a loop structure for CURLYX

123

WHILEM WHILEM, no 0 V ; Do curly processing and see if rest matches.

#*Buffer related

# OPEN,CLOSE,GROUPP ...are numbered at compile time.

128

OPEN OPEN, num 1 ; Mark this point in input as start of #n.

129

CLOSE CLOSE, num 1 ; Analogous to OPEN.

130

131

REF REF, num 1 V ; Match some already matched string

132

REFF REF, num 1 V ; Match already matched string, folded using native charset semantics for non-utf8

133

REFFL REF, num 1 V ; Match already matched string, folded in loc.

134

# REFFU and NREFFU could have been implemented using the FLAGS field of the

135

# regnode, but by having a separate node type, we can use the existing switch

136

# statement to avoid some tests

137

REFFU REF, num 1 V ; Match already matched string, folded using unicode semantics for non-utf8

138

139

#*Named references. Code in regcomp.c assumes that these all are after the numbered references

140

NREF REF, no-sv 1 V ; Match some already matched string

141

NREFF REF, no-sv 1 V ; Match already matched string, folded using native charset semantics for non-utf8

142

NREFFL REF, no-sv 1 V ; Match already matched string, folded in loc.

143

NREFFU REF, num 1 V ; Match already matched string, folded using unicode semantics for non-utf8

144

145

IFMATCH BRANCHJ, off 1 . 2 ; Succeeds if the following matches.

146

UNLESSM BRANCHJ, off 1 . 2 ; Fails if the following matches.

147

SUSPEND BRANCHJ, off 1 V 1 ; "Independent" sub-RE.

148

IFTHEN BRANCHJ, off 1 V 1 ; Switch, should be preceded by switcher .

149

GROUPP GROUPP, num 1 ; Whether the group matched.

150

151

#*Support for long RE

152

153

LONGJMP LONGJMP, off 1 . 1 ; Jump far away.

154

BRANCHJ BRANCHJ, off 1 V 1 ; BRANCH with long offset.

#*The heavy worker

EVAL EVAL, evl 1 ; Execute some Perl code.

#*Modifiers

MINMOD MINMOD, no ; Next operator is not greedy.

163

LOGICAL LOGICAL, no ; Next opcode should set the flag only.

164

165

# This is not used yet

166

RENUM BRANCHJ, off 1 . 1 ; Group with independently numbered parens.

#*Trie Related

# Behave the same as A|LIST|OF|WORDS would. The '..C' variants have

171

# inline charclass data (ascii only), the 'C' store it in the structure.

172

# NOTE: the relative order of the TRIE-like regops is significant

173

174

TRIE TRIE, trie 1 ; Match many EXACT(F[LU]?)? at once. flags==type

175

TRIEC TRIE,trie charclass ; Same as TRIE, but with embedded charclass data

176

177

# For start classes, contains an added fail table.

178

AHOCORASICK TRIE, trie 1 ; Aho Corasick stclass. flags==type

179

AHOCORASICKC TRIE,trie charclass ; Same as AHOCORASICK, but with embedded charclass data

180

181

#*Regex Subroutines

182

GOSUB GOSUB, num/ofs 2L ; recurse to paren arg1 at (signed) ofs arg2

183

GOSTART GOSTART, no ; recurse to start of pattern

184

185

#*Special conditionals

186

NGROUPP NGROUPP, no-sv 1 ; Whether the group matched.

187

INSUBP INSUBP, num 1 ; Whether we are in a specific recurse.

188

DEFINEP DEFINEP, none 1 ; Never execute directly.

189

190

#*Backtracking Verbs

191

ENDLIKE ENDLIKE, none ; Used only for the type field of verbs

192

OPFAIL ENDLIKE, none ; Same as (?!)

193

ACCEPT ENDLIKE, parno 1 ; Accepts the current matched string.

194

195

196

#*Verbs With Arguments

197

VERB VERB, no-sv 1 ; Used only for the type field of verbs

198

PRUNE VERB, no-sv 1 ; Pattern fails at this startpoint if no-backtracking through this

199

MARKPOINT VERB, no-sv 1 ; Push the current location for rollback by cut.

200

SKIP VERB, no-sv 1 ; On failure skip forward (to the mark) before retrying

201

COMMIT VERB, no-sv 1 ; Pattern fails outright if backtracking through this

202

CUTGROUP VERB, no-sv 1 ; On failure go to the next alternation in the group

203

204

#*Control what to keep in $&.

205

KEEPS KEEPS, no ; $& begins here.

206

207

#*New charclass like patterns

208

LNBREAK LNBREAK, none ; generic newline pattern

209

VERTWS VERTWS, none 0 S ; vertical whitespace (Perl 6)

210

NVERTWS NVERTWS, none 0 S ; not vertical whitespace (Perl 6)

211

HORIZWS HORIZWS, none 0 S ; horizontal whitespace (Perl 6)

212

NHORIZWS NHORIZWS, none 0 S ; not horizontal whitespace (Perl 6)

213

214

FOLDCHAR FOLDCHAR, codepoint 1 ; codepoint with tricky case folding properties.

215

216

217

# NEW STUFF SOMEWHERE ABOVE THIS LINE

218

219

################################################################################

#*SPECIAL REGOPS

# This is not really a node, but an optimized away piece of a "long" node.

224

# To simplify debugging output, we mark it as if it were a node

225

OPTIMIZED NOTHING, off ; Placeholder for dump.

226

227

# Special opcode with the property that no opcode in a compiled program

228

# will ever be of this type. Thus it can be used as a flag value that

229

# no other opcode has been seen. END is used similarly, in that an END

230

# node cant be optimized. So END implies "unoptimizable" and PSEUDO mean

231

# "not seen anything to optimize yet".

232

PSEUDO PSEUDO, off ; Pseudo opcode for internal use.

233

234

-------------------------------------------------------------------------------

235

# Format for second section:

236

# REGOP \t typelist [ \t typelist] [# Comment]

# typelist= namelist

# = namelist:FAIL

# = name:count

# Anything below is a state

#

#

TRIE next:FAIL

EVAL AB:FAIL

CURLYX end:FAIL

WHILEM A_pre,A_min,A_max,B_min,B_max:FAIL

BRANCH next:FAIL

CURLYM A,B:FAIL

IFMATCH A:FAIL

CURLY B_min_known,B_min,B_max:FAIL

COMMIT next:FAIL

MARKPOINT next:FAIL

SKIP next:FAIL

CUTGROUP next:FAIL

KEEPS next:FAIL