X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/34fdef848b1687b91892ba55e9e0c3430e0770f6..b4941db24e5b5bf41a100f8944f5845de2bdf8ab:/regcomp.sym?ds=sidebyside diff --git a/regcomp.sym b/regcomp.sym index 4764d0e..6908712 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -24,39 +24,42 @@ END END, no ; End of program. SUCCEED END, no ; Return from a subroutine, basically. -#* Anchors: - -BOL BOL, no ; Match "" at beginning of line. -MBOL BOL, no ; Same, assuming multiline. -SBOL BOL, no ; Same, assuming singleline. -EOS EOL, no ; Match "" at end of string. -EOL EOL, no ; Match "" at end of line. -MEOL EOL, no ; Same, assuming multiline. -SEOL EOL, no ; Same, assuming singleline. +#* Line Start Anchors: +#Note flags field for SBOL indicates if it is a /^/ or a /\A/ +SBOL BOL, no ; Match "" at beginning of line: /^/, /\A/ +MBOL BOL, no ; Same, assuming multiline: /^/m + +#* Line End Anchors: +SEOL EOL, no ; Match "" at end of line: /$/ +MEOL EOL, no ; Same, assuming multiline: /$/m +EOS EOL, no ; Match "" at end of string: /\z/ + +#* Match Start Anchors: +GPOS GPOS, no ; Matches where last m//g left off. + +#* Word Boundary Opcodes: # The regops that have varieties that vary depending on the character set regex # modifiers have to ordered thusly: /d, /l, /u, /a, /aa. This is because code # in regcomp.c uses the enum value of the modifier as an offset from the /d # version. The complements must come after the non-complements. # BOUND, POSIX and their complements are affected, as well as EXACTF. -BOUND BOUND, no ; Match "" at any word boundary using native charset semantics for non-utf8 +BOUND BOUND, no ; Match "" at any word boundary using native charset rules for non-utf8 BOUNDL BOUND, no ; Match "" at any locale word boundary -BOUNDU BOUND, no ; Match "" at any word boundary using Unicode semantics -BOUNDA BOUND, no ; Match "" at any word boundary using ASCII semantics +BOUNDU BOUND, no ; Match "" at any word boundary using Unicode rules +BOUNDA BOUND, no ; Match "" at any word boundary using ASCII rules # All NBOUND nodes are required by code in regexec.c to be greater than all BOUND ones -NBOUND NBOUND, no ; Match "" at any word non-boundary using native charset semantics for non-utf8 +NBOUND NBOUND, no ; Match "" at any word non-boundary using native charset rules for non-utf8 NBOUNDL NBOUND, no ; Match "" at any locale word non-boundary -NBOUNDU NBOUND, no ; Match "" at any word non-boundary using Unicode semantics -NBOUNDA NBOUND, no ; Match "" at any word non-boundary using ASCII semantics -GPOS GPOS, no ; Matches where last m//g left off. +NBOUNDU NBOUND, no ; Match "" at any word non-boundary using Unicode rules +NBOUNDA NBOUND, no ; Match "" at any word non-boundary using ASCII rules #* [Special] alternatives: - REG_ANY REG_ANY, no 0 S ; Match any one character (except newline). SANY REG_ANY, no 0 S ; Match any one character. CANY REG_ANY, no 0 S ; Match any one byte. ANYOF ANYOF, sv 0 S ; Match character in (or not in) this class, single char match only -ANYOF_NON_UTF8_NON_ASCII_ALL ANYOF, sv 0 S ; like ANYOF, also matches any U+80 - U+FF when not in UTF-8 +#* POSIX Character Classes: # Order of the below is important. See ordering comment above. POSIXD POSIXD, none 0 S ; Some [[:class:]] under /d; the FLAGS field gives which one POSIXL POSIXD, none 0 S ; Some [[:class:]] under /l; the FLAGS field gives which one @@ -132,32 +135,33 @@ OPEN OPEN, num 1 ; Mark this point in input as start of #n. CLOSE CLOSE, num 1 ; Analogous to OPEN. REF REF, num 1 V ; Match some already matched string -REFF REF, num 1 V ; Match already matched string, folded using native charset semantics for non-utf8 +REFF REF, num 1 V ; Match already matched string, folded using native charset rules for non-utf8 REFFL REF, num 1 V ; Match already matched string, folded in loc. # N?REFF[AU] could have been implemented using the FLAGS field of the # regnode, but by having a separate node type, we can use the existing switch # statement to avoid some tests -REFFU REF, num 1 V ; Match already matched string, folded using unicode semantics for non-utf8 -REFFA REF, num 1 V ; Match already matched string, folded using unicode semantics for non-utf8, no mixing ASCII, non-ASCII +REFFU REF, num 1 V ; Match already matched string, folded using unicode rules for non-utf8 +REFFA REF, num 1 V ; Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII #*Named references. Code in regcomp.c assumes that these all are after #*the numbered references NREF REF, no-sv 1 V ; Match some already matched string -NREFF REF, no-sv 1 V ; Match already matched string, folded using native charset semantics for non-utf8 +NREFF REF, no-sv 1 V ; Match already matched string, folded using native charset rules for non-utf8 NREFFL REF, no-sv 1 V ; Match already matched string, folded in loc. -NREFFU REF, num 1 V ; Match already matched string, folded using unicode semantics for non-utf8 -NREFFA REF, num 1 V ; Match already matched string, folded using unicode semantics for non-utf8, no mixing ASCII, non-ASCII +NREFFU REF, num 1 V ; Match already matched string, folded using unicode rules for non-utf8 +NREFFA REF, num 1 V ; Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII + +#*Support for long RE +LONGJMP LONGJMP, off 1 . 1 ; Jump far away. +BRANCHJ BRANCHJ, off 1 V 1 ; BRANCH with long offset. +#*Special Case Regops IFMATCH BRANCHJ, off 1 . 2 ; Succeeds if the following matches. UNLESSM BRANCHJ, off 1 . 2 ; Fails if the following matches. SUSPEND BRANCHJ, off 1 V 1 ; "Independent" sub-RE. IFTHEN BRANCHJ, off 1 V 1 ; Switch, should be preceded by switcher. GROUPP GROUPP, num 1 ; Whether the group matched. -#*Support for long RE - -LONGJMP LONGJMP, off 1 . 1 ; Jump far away. -BRANCHJ BRANCHJ, off 1 V 1 ; BRANCH with long offset. #*The heavy worker