From: David Mitchell Date: Fri, 25 Sep 2015 15:02:53 +0000 (+0100) Subject: fbm_instr(): tweak docs and formatting X-Git-Tag: v5.23.4~95 X-Git-Url: https://perl5.git.perl.org/perl5.git/commitdiff_plain/41c8d07a26fff22a68ce29ba9cedf18578b32343?hp=8c9009ad13ddadd4e5865ef02a27df15933a7ca7;ds=sidebyside fbm_instr(): tweak docs and formatting Expand the commentary at the start of this function; add more blank lines to separate chunks of code, and document what SVpbm_TAIL is for. --- diff --git a/sv.h b/sv.h index 57116d4..e1797de 100644 --- a/sv.h +++ b/sv.h @@ -467,7 +467,7 @@ perform the upgrade if necessary. See C>. /* PVHV */ #define SVphv_HASKFLAGS 0x80000000 /* keys have flag byte after hash */ /* PVGV when SVpbm_VALID is true */ -#define SVpbm_TAIL 0x80000000 +#define SVpbm_TAIL 0x80000000 /* string has a fake "\n" appended */ /* RV upwards. However, SVf_ROK and SVp_IOK are exclusive */ #define SVprv_WEAKREF 0x80000000 /* Weak reference */ /* pad name vars only */ diff --git a/util.c b/util.c index 616356e..315a04c 100644 --- a/util.c +++ b/util.c @@ -727,21 +727,37 @@ Perl_fbm_compile(pTHX_ SV *sv, U32 flags) s[rarest], (UV)rarest)); } -/* If SvTAIL(littlestr), it has a fake '\n' at end. */ -/* If SvTAIL is actually due to \Z or \z, this gives false positives - if multiline */ /* =for apidoc fbm_instr Returns the location of the SV in the string delimited by C and -C. It returns C if the string can't be found. The C +C (C) is the char following the last char). +It returns C if the string can't be found. The C does not have to be C, but the search will not be as fast then. =cut + +If SvTAIL(littlestr) is true, a fake "\n" was appended to to the string +during FBM compilation due to FBMcf_TAIL in flags. It indicates that +the littlestr must be anchored to the end of bigstr (or to any \n if +FBMrf_MULTILINE). + +E.g. The regex compiler would compile /abc/ to a littlestr of "abc", +while /abc$/ compiles to "abc\n" with SvTAIL() true. + +A littlestr of "abc", !SvTAIL matches as /abc/; +a littlestr of "ab\n", SvTAIL matches as: + without FBMrf_MULTILINE: /ab\n?\z/ + with FBMrf_MULTILINE: /ab\n/ || /ab\z/; + +(According to Ilya from 1999; I don't know if this is still true, DAPM 2015): + "If SvTAIL is actually due to \Z or \z, this gives false positives + if multiline". */ + char * Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U32 flags) { @@ -766,6 +782,7 @@ Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U switch (littlelen) { /* Special cases for 0, 1 and 2 */ case 0: return (char*)big; /* Cannot be SvTAIL! */ + case 1: if (SvTAIL(littlestr) && !multiline) { /* Anchor only! */ /* Know that bigend != big. */ @@ -782,6 +799,7 @@ Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U if (SvTAIL(littlestr)) return (char *) bigend; return NULL; + case 2: if (SvTAIL(littlestr) && !multiline) { if (bigend[-1] == '\n' && bigend[-2] == *little) @@ -842,6 +860,7 @@ Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U if (SvTAIL(littlestr) && (*bigend == *little)) return (char *)bigend; /* bigend is already decremented. */ return NULL; + default: break; /* Only lengths 0 1 and 2 have special-case code. */ } @@ -861,6 +880,8 @@ Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U } return NULL; } + + /* not compiled; use Perl_ninstr() instead */ if (!SvVALID(littlestr)) { char * const b = ninstr((char*)big,(char*)bigend, (char*)little, (char*)little + littlelen); @@ -930,6 +951,7 @@ Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U } } + /* =for apidoc foldEQ