+# define RMAGIC_SZ sizeof (u_int) /* Overhead at end of bucket */
+# ifdef TWO_POT_OPTIMIZE
+# define MAX_SHORT_BUCKET (12 * BUCKETS_PER_POW2) /* size-1 fits in short */
+# else
+# define MAX_SHORT_BUCKET (13 * BUCKETS_PER_POW2)
+# endif
+#else
+# define RMAGIC_SZ 0
+#endif
+
+#if !defined(PACK_MALLOC) && defined(BUCKETS_ROOT2)
+# undef BUCKETS_ROOT2
+#endif
+
+#ifdef BUCKETS_ROOT2
+# define BUCKET_TABLE_SHIFT 2
+# define BUCKET_POW2_SHIFT 1
+# define BUCKETS_PER_POW2 2
+#else
+# define BUCKET_TABLE_SHIFT MIN_BUC_POW2
+# define BUCKET_POW2_SHIFT 0
+# define BUCKETS_PER_POW2 1
+#endif
+
+#if !defined(MEM_ALIGNBYTES) || ((MEM_ALIGNBYTES > 4) && !defined(STRICT_ALIGNMENT))
+/* Figure out the alignment of void*. */
+struct aligner {
+ char c;
+ void *p;
+};
+# define ALIGN_SMALL ((int)((caddr_t)&(((struct aligner*)0)->p)))
+#else
+# define ALIGN_SMALL MEM_ALIGNBYTES
+#endif
+
+#define IF_ALIGN_8(yes,no) ((ALIGN_SMALL>4) ? (yes) : (no))
+
+#ifdef BUCKETS_ROOT2
+# define MAX_BUCKET_BY_TABLE 13
+static const u_short buck_size[MAX_BUCKET_BY_TABLE + 1] =
+ {
+ 0, 0, 0, 0, 4, 4, 8, 12, 16, 24, 32, 48, 64, 80,
+ };
+# define BUCKET_SIZE_NO_SURPLUS(i) ((i) % 2 ? buck_size[i] : (1 << ((i) >> BUCKET_POW2_SHIFT)))
+# define BUCKET_SIZE_REAL(i) ((i) <= MAX_BUCKET_BY_TABLE \
+ ? buck_size[i] \
+ : ((1 << ((i) >> BUCKET_POW2_SHIFT)) \
+ - MEM_OVERHEAD(i) \
+ + POW2_OPTIMIZE_SURPLUS(i)))
+#else
+# define BUCKET_SIZE_NO_SURPLUS(i) (1 << ((i) >> BUCKET_POW2_SHIFT))
+# define BUCKET_SIZE(i) (BUCKET_SIZE_NO_SURPLUS(i) + POW2_OPTIMIZE_SURPLUS(i))
+# define BUCKET_SIZE_REAL(i) (BUCKET_SIZE(i) - MEM_OVERHEAD(i))
+#endif
+
+
+#ifdef PACK_MALLOC
+/* In this case there are several possible layout of arenas depending
+ * on the size. Arenas are of sizes multiple to 2K, 2K-aligned, and
+ * have a size close to a power of 2.
+ *
+ * Arenas of the size >= 4K keep one chunk only. Arenas of size 2K
+ * may keep one chunk or multiple chunks. Here are the possible
+ * layouts of arenas:
+ *
+ * # One chunk only, chunksize 2^k + SOMETHING - ALIGN, k >= 11
+ *
+ * INDEX MAGIC1 UNUSED CHUNK1
+ *
+ * # Multichunk with sanity checking and chunksize 2^k-ALIGN, k>7
+ *
+ * INDEX MAGIC1 MAGIC2 MAGIC3 UNUSED CHUNK1 CHUNK2 CHUNK3 ...
+ *
+ * # Multichunk with sanity checking and size 2^k-ALIGN, k=7
+ *
+ * INDEX MAGIC1 MAGIC2 MAGIC3 UNUSED CHUNK1 UNUSED CHUNK2 CHUNK3 ...
+ *
+ * # Multichunk with sanity checking and size up to 80
+ *
+ * INDEX UNUSED MAGIC1 UNUSED MAGIC2 UNUSED ... CHUNK1 CHUNK2 CHUNK3 ...
+ *
+ * # No sanity check (usually up to 48=byte-long buckets)
+ * INDEX UNUSED CHUNK1 CHUNK2 ...
+ *
+ * Above INDEX and MAGIC are one-byte-long. Sizes of UNUSED are
+ * appropriate to keep algorithms simple and memory aligned. INDEX
+ * encodes the size of the chunk, while MAGICn encodes state (used,
+ * free or non-managed-by-us-so-it-indicates-a-bug) of CHUNKn. MAGIC
+ * is used for sanity checking purposes only. SOMETHING is 0 or 4K
+ * (to make size of big CHUNK accomodate allocations for powers of two
+ * better).
+ *
+ * [There is no need to alignment between chunks, since C rules ensure
+ * that structs which need 2^k alignment have sizeof which is
+ * divisible by 2^k. Thus as far as the last chunk is aligned at the
+ * end of the arena, and 2K-alignment does not contradict things,
+ * everything is going to be OK for sizes of chunks 2^n and 2^n +
+ * 2^k. Say, 80-bit buckets will be 16-bit aligned, and as far as we
+ * put allocations for requests in 65..80 range, all is fine.
+ *
+ * Note, however, that standard malloc() puts more strict
+ * requirements than the above C rules. Moreover, our algorithms of
+ * realloc() may break this idyll, but we suppose that realloc() does
+ * need not change alignment.]
+ *
+ * Is very important to make calculation of the offset of MAGICm as
+ * quick as possible, since it is done on each malloc()/free(). In
+ * fact it is so quick that it has quite little effect on the speed of
+ * doing malloc()/free(). [By default] We forego such calculations
+ * for small chunks, but only to save extra 3% of memory, not because
+ * of speed considerations.
+ *
+ * Here is the algorithm [which is the same for all the allocations
+ * schemes above], see OV_MAGIC(block,bucket). Let OFFSETm be the
+ * offset of the CHUNKm from the start of ARENA. Then offset of
+ * MAGICm is (OFFSET1 >> SHIFT) + ADDOFFSET. Here SHIFT and ADDOFFSET
+ * are numbers which depend on the size of the chunks only.
+ *
+ * Let as check some sanity conditions. Numbers OFFSETm>>SHIFT are
+ * different for all the chunks in the arena if 2^SHIFT is not greater
+ * than size of the chunks in the arena. MAGIC1 will not overwrite
+ * INDEX provided ADDOFFSET is >0 if OFFSET1 < 2^SHIFT. MAGIClast
+ * will not overwrite CHUNK1 if OFFSET1 > (OFFSETlast >> SHIFT) +
+ * ADDOFFSET.
+ *
+ * Make SHIFT the maximal possible (there is no point in making it
+ * smaller). Since OFFSETlast is 2K - CHUNKSIZE, above restrictions
+ * give restrictions on OFFSET1 and on ADDOFFSET.
+ *
+ * In particular, for chunks of size 2^k with k>=6 we can put
+ * ADDOFFSET to be from 0 to 2^k - 2^(11-k), and have
+ * OFFSET1==chunksize. For chunks of size 80 OFFSET1 of 2K%80=48 is
+ * large enough to have ADDOFFSET between 1 and 16 (similarly for 96,
+ * when ADDOFFSET should be 1). In particular, keeping MAGICs for
+ * these sizes gives no additional size penalty.
+ *
+ * However, for chunks of size 2^k with k<=5 this gives OFFSET1 >=
+ * ADDOFSET + 2^(11-k). Keeping ADDOFFSET 0 allows for 2^(11-k)-2^(11-2k)
+ * chunks per arena. This is smaller than 2^(11-k) - 1 which are
+ * needed if no MAGIC is kept. [In fact, having a negative ADDOFFSET
+ * would allow for slightly more buckets per arena for k=2,3.]
+ *
+ * Similarly, for chunks of size 3/2*2^k with k<=5 MAGICs would span
+ * the area up to 2^(11-k)+ADDOFFSET. For k=4 this give optimal
+ * ADDOFFSET as -7..0. For k=3 ADDOFFSET can go up to 4 (with tiny
+ * savings for negative ADDOFFSET). For k=5 ADDOFFSET can go -1..16
+ * (with no savings for negative values).
+ *
+ * In particular, keeping ADDOFFSET 0 for sizes of chunks up to 2^6
+ * leads to tiny pessimizations in case of sizes 4, 8, 12, 24, and
+ * leads to no contradictions except for size=80 (or 96.)
+ *
+ * However, it also makes sense to keep no magic for sizes 48 or less.
+ * This is what we do. In this case one needs ADDOFFSET>=1 also for
+ * chunksizes 12, 24, and 48, unless one gets one less chunk per
+ * arena.
+ *
+ * The algo of OV_MAGIC(block,bucket) keeps ADDOFFSET 0 until
+ * chunksize of 64, then makes it 1.
+ *
+ * This allows for an additional optimization: the above scheme leads
+ * to giant overheads for sizes 128 or more (one whole chunk needs to
+ * be sacrifised to keep INDEX). Instead we use chunks not of size
+ * 2^k, but of size 2^k-ALIGN. If we pack these chunks at the end of
+ * the arena, then the beginnings are still in different 2^k-long
+ * sections of the arena if k>=7 for ALIGN==4, and k>=8 if ALIGN=8.
+ * Thus for k>7 the above algo of calculating the offset of the magic
+ * will still give different answers for different chunks. And to
+ * avoid the overrun of MAGIC1 into INDEX, one needs ADDOFFSET of >=1.
+ * In the case k=7 we just move the first chunk an extra ALIGN
+ * backward inside the ARENA (this is done once per arena lifetime,
+ * thus is not a big overhead). */
+# define MAX_PACKED_POW2 6
+# define MAX_PACKED (MAX_PACKED_POW2 * BUCKETS_PER_POW2 + BUCKET_POW2_SHIFT)
+# define MAX_POW2_ALGO ((1<<(MAX_PACKED_POW2 + 1)) - M_OVERHEAD)
+# define TWOK_MASK ((1<<LOG_OF_MIN_ARENA) - 1)
+# define TWOK_MASKED(x) (PTR2UV(x) & ~TWOK_MASK)
+# define TWOK_SHIFT(x) (PTR2UV(x) & TWOK_MASK)
+# define OV_INDEXp(block) (INT2PTR(u_char*,TWOK_MASKED(block)))
+# define OV_INDEX(block) (*OV_INDEXp(block))
+# define OV_MAGIC(block,bucket) (*(OV_INDEXp(block) + \
+ (TWOK_SHIFT(block)>> \
+ (bucket>>BUCKET_POW2_SHIFT)) + \
+ (bucket >= MIN_NEEDS_SHIFT ? 1 : 0)))
+ /* A bucket can have a shift smaller than it size, we need to
+ shift its magic number so it will not overwrite index: */
+# ifdef BUCKETS_ROOT2
+# define MIN_NEEDS_SHIFT (7*BUCKETS_PER_POW2 - 1) /* Shift 80 greater than chunk 64. */
+# else
+# define MIN_NEEDS_SHIFT (7*BUCKETS_PER_POW2) /* Shift 128 greater than chunk 32. */
+# endif
+# define CHUNK_SHIFT 0
+
+/* Number of active buckets of given ordinal. */
+#ifdef IGNORE_SMALL_BAD_FREE
+#define FIRST_BUCKET_WITH_CHECK (6 * BUCKETS_PER_POW2) /* 64 */
+# define N_BLKS(bucket) ( (bucket) < FIRST_BUCKET_WITH_CHECK \
+ ? ((1<<LOG_OF_MIN_ARENA) - 1)/BUCKET_SIZE_NO_SURPLUS(bucket) \
+ : n_blks[bucket] )
+#else
+# define N_BLKS(bucket) n_blks[bucket]
+#endif
+
+static const u_short n_blks[LOG_OF_MIN_ARENA * BUCKETS_PER_POW2] =
+ {
+# if BUCKETS_PER_POW2==1
+ 0, 0,
+ (MIN_BUC_POW2==2 ? 384 : 0),
+ 224, 120, 62, 31, 16, 8, 4, 2
+# else
+ 0, 0, 0, 0,
+ (MIN_BUC_POW2==2 ? 384 : 0), (MIN_BUC_POW2==2 ? 384 : 0), /* 4, 4 */
+ 224, 149, 120, 80, 62, 41, 31, 25, 16, 16, 8, 8, 4, 4, 2, 2
+# endif
+ };
+
+/* Shift of the first bucket with the given ordinal inside 2K chunk. */
+#ifdef IGNORE_SMALL_BAD_FREE
+# define BLK_SHIFT(bucket) ( (bucket) < FIRST_BUCKET_WITH_CHECK \
+ ? ((1<<LOG_OF_MIN_ARENA) \
+ - BUCKET_SIZE_NO_SURPLUS(bucket) * N_BLKS(bucket)) \
+ : blk_shift[bucket])