ret = newSVpvs_flags("\\[$@%*]$@", SVs_TEMP);
goto set;
}
+ if (code == -KEY___FILE__ || code == -KEY___LINE__
+ || code == -KEY___PACKAGE__) {
+ ret = newSVpvs_flags("", SVs_TEMP);
+ goto set;
+ }
if (code == -KEY_readpipe) {
s = "CORE::backtick";
}
{
dVAR; dSP; dPOPss;
register unsigned char *s;
- register I32 pos;
- register I32 ch;
- register I32 *sfirst;
- register I32 *snext;
+ char *sfirst_raw;
STRLEN len;
+ MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL;
+ U8 quanta;
+ STRLEN size;
+
+ if (mg && SvSCREAM(sv))
+ RETPUSHYES;
- if (sv == PL_lastscream) {
- if (SvSCREAM(sv))
- RETPUSHYES;
- }
s = (unsigned char*)(SvPV(sv, len));
if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
/* No point in studying a zero length string, and not safe to study
stringification. Also refuse to study an FBM scalar, as this gives
more flexibility in SV flag usage. No real-world code would ever
end up studying an FBM scalar, so this isn't a real pessimisation.
+ Endemic use of I32 in Perl_screaminstr makes it hard to safely push
+ the study length limit from I32_MAX to U32_MAX - 1.
*/
RETPUSHNO;
}
- pos = len;
- if (PL_lastscream) {
- SvSCREAM_off(PL_lastscream);
- SvREFCNT_dec(PL_lastscream);
- }
- PL_lastscream = SvREFCNT_inc_simple(sv);
+ if (len < 0xFF) {
+ quanta = 1;
+ } else if (len < 0xFFFF) {
+ quanta = 2;
+ } else
+ quanta = 4;
- if (pos > PL_maxscream) {
- if (PL_maxscream < 0) {
- PL_maxscream = pos + 80;
- Newx(PL_screamfirst, 256, I32);
- Newx(PL_screamnext, PL_maxscream, I32);
- }
- else {
- PL_maxscream = pos + pos / 4;
- Renew(PL_screamnext, PL_maxscream, I32);
- }
- }
-
- sfirst = PL_screamfirst;
- snext = PL_screamnext;
+ size = (256 + len) * quanta;
+ sfirst_raw = (char *)safemalloc(size);
- if (!sfirst || !snext)
+ if (!sfirst_raw)
DIE(aTHX_ "do_study: out of memory");
- for (ch = 256; ch; --ch)
- *sfirst++ = -1;
- sfirst -= 256;
-
- while (--pos >= 0) {
- register const I32 ch = s[pos];
- if (sfirst[ch] >= 0)
- snext[pos] = sfirst[ch] - pos;
- else
- snext[pos] = -pos;
- sfirst[ch] = pos;
+ SvSCREAM_on(sv);
+ if (!mg)
+ mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0);
+ mg->mg_ptr = sfirst_raw;
+ mg->mg_len = size;
+ mg->mg_private = quanta;
+
+ memset(sfirst_raw, ~0, 256 * quanta);
+
+ /* The assumption here is that most studied strings are fairly short, hence
+ the pain of the extra code is worth it, given the memory savings.
+ 80 character string, 336 bytes as U8, down from 1344 as U32
+ 800 character string, 2112 bytes as U16, down from 4224 as U32
+ */
+
+ if (quanta == 1) {
+ U8 *const sfirst = (U8 *)sfirst_raw;
+ U8 *const snext = sfirst + 256;
+ while (len-- > 0) {
+ const U8 ch = s[len];
+ snext[len] = sfirst[ch];
+ sfirst[ch] = len;
+ }
+ } else if (quanta == 2) {
+ U16 *const sfirst = (U16 *)sfirst_raw;
+ U16 *const snext = sfirst + 256;
+ while (len-- > 0) {
+ const U8 ch = s[len];
+ snext[len] = sfirst[ch];
+ sfirst[ch] = len;
+ }
+ } else {
+ U32 *const sfirst = (U32 *)sfirst_raw;
+ U32 *const snext = sfirst + 256;
+ while (len-- > 0) {
+ const U8 ch = s[len];
+ snext[len] = sfirst[ch];
+ sfirst[ch] = len;
+ }
}
- SvSCREAM_on(sv);
- /* piggyback on m//g magic */
- sv_magic(sv, NULL, PERL_MAGIC_regex_global, NULL, 0);
RETPUSHYES;
}