summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
e9321f5)
In the "just matched float substr, now match fixed substr" branch,
initially add an extra prog->anchored_offset to the last and last2 vars;
since a lot of the later calculations involve adding anchored_offset,
doing this early to the last* vars means less work in some cases. In
particular, last is calculated from s by a single
HOP4(s, prog->anchored_offset-start_shift,...)
rather than two separate
HOP3(s, -start_shift,...);
HOP3(..., prog->anchored_offset,...);
which may mostly cancel each other out.
Similarly with last2. Later, we can skip adding prog->anchored_offset to
last1, since its antecedents already have the bias added.
In the case of failure, calculating a new start position involves an extra
HOP to s, but removes a HOP from other_last, so the two cancel out.
To make this work, I revived the reghop4() function which had been
commented out, and added a HOP4c() wrapper macro. This is like HOP3c(),
but allows you to specify both lower and upper limits. Useful when you
don't know the sign of the offset in advance.
(Yves had earlier added this function, but had commented it out until such
time as it was actually used.)
I also added some extra comments to this block and removed the comment
about it being maybe broken under utf8, since I'm auditing the code for
utf8-safeness.
ERsM |SV* |core_regclass_swash|NULLOK const regexp *prog \
|NN const struct regnode *node|bool doinit \
|NULLOK SV **listsvp
ERsM |SV* |core_regclass_swash|NULLOK const regexp *prog \
|NN const struct regnode *node|bool doinit \
|NULLOK SV **listsvp
ERsn |U8* |reghop4 |NN U8 *s|SSize_t off|NN const U8 *llim \
|NN const U8 *rlim
ERsn |U8* |reghop4 |NN U8 *s|SSize_t off|NN const U8 *llim \
|NN const U8 *rlim
ERsn |U8* |reghopmaybe3 |NN U8 *s|SSize_t off|NN const U8 *lim
ERs |char* |find_byclass |NN regexp * prog|NN const regnode *c \
|NN char *s|NN const char *strend \
ERsn |U8* |reghopmaybe3 |NN U8 *s|SSize_t off|NN const U8 *lim
ERs |char* |find_byclass |NN regexp * prog|NN const regnode *c \
|NN char *s|NN const char *strend \
#define regcppop(a,b) S_regcppop(aTHX_ a,b)
#define regcppush(a,b,c) S_regcppush(aTHX_ a,b,c)
#define reghop3 S_reghop3
#define regcppop(a,b) S_regcppop(aTHX_ a,b)
#define regcppush(a,b,c) S_regcppush(aTHX_ a,b,c)
#define reghop3 S_reghop3
+#define reghop4 S_reghop4
#define reghopmaybe3 S_reghopmaybe3
#define reginclass(a,b,c,d,e) S_reginclass(aTHX_ a,b,c,d,e)
#define regmatch(a,b,c) S_regmatch(aTHX_ a,b,c)
#define reghopmaybe3 S_reghopmaybe3
#define reginclass(a,b,c,d,e) S_reginclass(aTHX_ a,b,c,d,e)
#define regmatch(a,b,c) S_regmatch(aTHX_ a,b,c)
#define regtry(a,b) S_regtry(aTHX_ a,b)
#define to_byte_substr(a) S_to_byte_substr(aTHX_ a)
#define to_utf8_substr(a) S_to_utf8_substr(aTHX_ a)
#define regtry(a,b) S_regtry(aTHX_ a,b)
#define to_byte_substr(a) S_to_byte_substr(aTHX_ a)
#define to_utf8_substr(a) S_to_utf8_substr(aTHX_ a)
-# if defined(XXX_dmq)
-#define reghop4 S_reghop4
-# endif
# endif
# if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
#define _to_fold_latin1(a,b,c,d) Perl__to_fold_latin1(aTHX_ a,b,c,d)
# endif
# if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
#define _to_fold_latin1(a,b,c,d) Perl__to_fold_latin1(aTHX_ a,b,c,d)
#define PERL_ARGS_ASSERT_REGHOP3 \
assert(s); assert(lim)
#define PERL_ARGS_ASSERT_REGHOP3 \
assert(s); assert(lim)
+STATIC U8* S_reghop4(U8 *s, SSize_t off, const U8 *llim, const U8 *rlim)
+ __attribute__warn_unused_result__
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(3)
+ __attribute__nonnull__(4);
+#define PERL_ARGS_ASSERT_REGHOP4 \
+ assert(s); assert(llim); assert(rlim)
+
STATIC U8* S_reghopmaybe3(U8 *s, SSize_t off, const U8 *lim)
__attribute__warn_unused_result__
__attribute__nonnull__(1)
STATIC U8* S_reghopmaybe3(U8 *s, SSize_t off, const U8 *lim)
__attribute__warn_unused_result__
__attribute__nonnull__(1)
#define PERL_ARGS_ASSERT_TO_UTF8_SUBSTR \
assert(prog)
#define PERL_ARGS_ASSERT_TO_UTF8_SUBSTR \
assert(prog)
-# if defined(XXX_dmq)
-STATIC U8* S_reghop4(U8 *s, SSize_t off, const U8 *llim, const U8 *rlim)
- __attribute__warn_unused_result__
- __attribute__nonnull__(1)
- __attribute__nonnull__(3)
- __attribute__nonnull__(4);
-#define PERL_ARGS_ASSERT_REGHOP4 \
- assert(s); assert(llim); assert(rlim)
-
-# endif
#endif
#if defined(PERL_IN_SCOPE_C)
STATIC void S_save_pushptri32ptr(pTHX_ void *const ptr1, const I32 i, void *const ptr2, const int type);
#endif
#if defined(PERL_IN_SCOPE_C)
STATIC void S_save_pushptri32ptr(pTHX_ void *const ptr1, const I32 i, void *const ptr2, const int type);
? reghop3((U8*)(pos), off, (U8*)(lim)) \
: (U8*)((pos + off) > lim ? lim : (pos + off)))
? reghop3((U8*)(pos), off, (U8*)(lim)) \
: (U8*)((pos + off) > lim ? lim : (pos + off)))
+#define HOP4(pos,off,llim, rlim) (reginfo->is_utf8_target \
+ ? reghop4((U8*)(pos), off, (U8*)(llim), (U8*)(rlim)) \
+ : (U8*)(pos + off))
+#define HOP4c(pos,off,llim, rlim) ((char*)HOP4(pos,off,llim, rlim))
#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
#define NEXTCHR_IS_EOS (nextchr < 0)
#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
#define NEXTCHR_IS_EOS (nextchr < 0)
if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) {
do_other_anchored:
{
if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) {
do_other_anchored:
{
- char * const last = HOP3c(s, -start_shift, strbeg);
char *last1, *last2;
char * const saved_s = s;
SV* must;
char *last1, *last2;
char * const saved_s = s;
SV* must;
+ /* we've previously found a floating substr starting at s.
+ * This means that the regex origin must lie somewhere
+ * between min: HOP3(s, -check_offset_max)
+ * between max: HOP3(s, -check_offset_min)
+ * (except that min will be >= strpos)
+ * So the fixed substr must lie somewhere between
+ * HOP3(min, anchored_offset)
+ * HOP3(max, anchored_offset) + SvCUR(substr)
+ */
+ assert(strpos + start_shift <= s);
+ last = HOP4c(s, prog->anchored_offset-start_shift,
+ strbeg, strend);
+
t = s - prog->check_offset_max;
if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
&& (!utf8_target
t = s - prog->check_offset_max;
if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
&& (!utf8_target
t = HOP3c(t, prog->anchored_offset, strend);
if (t < other_last) /* These positions already checked */
t = other_last;
t = HOP3c(t, prog->anchored_offset, strend);
if (t < other_last) /* These positions already checked */
t = other_last;
- last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
+
+ assert(prog->minlen > prog->anchored_offset);
+ last2 = last1 = HOP3c(strend,
+ prog->anchored_offset-prog->minlen, strbeg);
if (last < last1)
last1 = last;
if (last < last1)
last1 = last;
- /* XXXX It is not documented what units *_offsets are in.
- We assume bytes, but this is clearly wrong.
- Meaning this code needs to be carefully reviewed for errors.
- dmq.
- */
/* On end-of-str: see comment below. */
must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
/* On end-of-str: see comment below. */
must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
else
s = fbm_instr(
(unsigned char*)t,
else
s = fbm_instr(
(unsigned char*)t,
- HOP3(HOP3(last1, prog->anchored_offset, strend)
- + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
+ HOP3(last1 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
must,
multiline ? FBMrf_MULTILINE : 0
);
must,
multiline ? FBMrf_MULTILINE : 0
);
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
", trying floating at offset %ld...\n",
(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
", trying floating at offset %ld...\n",
(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
- other_last = HOP3c(last1, prog->anchored_offset+1, strend);
- s = HOP3c(last, 1, strend);
+ other_last = HOP3c(last1, 1, strend);
+ s = HOP4c(last, 1 - prog->anchored_offset, strbeg, strend);
-#ifdef XXX_dmq
-/* there are a bunch of places where we use two reghop3's that should
- be replaced with this routine. but since thats not done yet
- we ifdef it out - dmq
-*/
STATIC U8 *
S_reghop4(U8 *s, SSize_t off, const U8* llim, const U8* rlim)
{
STATIC U8 *
S_reghop4(U8 *s, SSize_t off, const U8* llim, const U8* rlim)
{
STATIC U8 *
S_reghopmaybe3(U8* s, SSize_t off, const U8* lim)
STATIC U8 *
S_reghopmaybe3(U8* s, SSize_t off, const U8* lim)