diff --git a/crates/ironcache-bench/src/lib.rs b/crates/ironcache-bench/src/lib.rs index 9c8cd26..f874915 100644 --- a/crates/ironcache-bench/src/lib.rs +++ b/crates/ironcache-bench/src/lib.rs @@ -22,13 +22,14 @@ //! all three so bytes-per-key is reported per class: //! //! - [`EncodingClass::Int`]: a canonical i64 in decimal (e.g. `"12345"`). Stored -//! with NO value heap allocation (the integer lives inline in the object). -//! Note its per-key footprint equals embstr's: the stored-value enum is sized -//! for its largest inline variant, so int saves a heap allocation but not slot -//! bytes. `OBJECT ENCODING` -> `int`. +//! inline in the per-key object's value enum with NO value heap allocation (the +//! `int` variant is the only string-family variant that does not box its bytes). +//! `OBJECT ENCODING` -> `int`. //! - [`EncodingClass::EmbStr`]: a short string at or below the embstr threshold -//! (44 bytes, [`ironcache_store::encoding::EMBSTR_THRESHOLD`]), stored inline in -//! the object (SSO). `OBJECT ENCODING` -> `embstr`. +//! (44 bytes, [`ironcache_store::encoding::EMBSTR_THRESHOLD`]), stored in a single +//! boxed value allocation (memory Round 2 shrank the per-key slot by boxing the +//! embstr bytes rather than carrying a fixed inline buffer). `OBJECT ENCODING` -> +//! `embstr`. //! - [`EncodingClass::Raw`]: a longer string stored out-of-line (a separate heap //! allocation). `OBJECT ENCODING` -> `raw`. diff --git a/crates/ironcache-server/src/dispatch.rs b/crates/ironcache-server/src/dispatch.rs index 3be18b5..2f8f34d 100644 --- a/crates/ironcache-server/src/dispatch.rs +++ b/crates/ironcache-server/src/dispatch.rs @@ -2372,7 +2372,7 @@ mod tests { ttl_present: false, snapshot_version: 0, }; - obj.value = ValueRepr::Inline(ironcache_store::kvobj::InlineBuf::from_bytes(b"x")); + obj.value = ValueRepr::Inline(Box::from(&b"x"[..])); st.insert_object(0, obj); // GET / STRLEN / GETSET against the non-string -> WRONGTYPE. @@ -2421,7 +2421,7 @@ mod tests { ttl_present: false, snapshot_version: 0, }; - obj.value = ValueRepr::Inline(ironcache_store::kvobj::InlineBuf::from_bytes(b"x")); + obj.value = ValueRepr::Inline(Box::from(&b"x"[..])); st.insert_object(0, obj); // MGET str missing lst -> [bulk("hi"), Null, Null]. The non-string yields Null, @@ -2726,7 +2726,7 @@ mod tests { ttl_present: false, snapshot_version: 0, }; - obj.value = ValueRepr::Inline(ironcache_store::kvobj::InlineBuf::from_bytes(b"x")); + obj.value = ValueRepr::Inline(Box::from(&b"x"[..])); st.insert_object(0, obj); assert_eq!( err_line(run_on(&c, &mut s, &mut st, t, &[b"INCR", b"lst"])), @@ -2769,7 +2769,7 @@ mod tests { ttl_present: false, snapshot_version: 0, }; - obj.value = ValueRepr::Inline(ironcache_store::kvobj::InlineBuf::from_bytes(b"x")); + obj.value = ValueRepr::Inline(Box::from(&b"x"[..])); st.insert_object(0, obj); assert_eq!( err_line(run_on( @@ -4196,7 +4196,7 @@ mod tests { ttl_present: false, snapshot_version: 0, }; - obj.value = ValueRepr::Inline(ironcache_store::kvobj::InlineBuf::from_bytes(b"x")); + obj.value = ValueRepr::Inline(Box::from(&b"x"[..])); st.insert_object(0, obj); match run_on_wheel(&c, &mut s, &mut st, &mut wheel, t, &[b"GETEX", b"lst"]) { Value::Error(e) => assert_eq!( diff --git a/crates/ironcache-store/src/kvobj.rs b/crates/ironcache-store/src/kvobj.rs index af08d41..e5faa63 100644 --- a/crates/ironcache-store/src/kvobj.rs +++ b/crates/ironcache-store/src/kvobj.rs @@ -24,7 +24,7 @@ //! command layer. The folded-header metadata (below) is laid out as the FAM //! version will pack it, so that follow-up is a representation change only. -use crate::encoding::{Classified, EMBSTR_THRESHOLD, classify}; +use crate::encoding::{Classified, classify}; use crate::scan_hash; use bytes::Bytes; use hashbrown::HashMap; @@ -41,12 +41,6 @@ use ironcache_storage::{ }; use std::collections::{BTreeSet, VecDeque}; -/// The inline-value buffer capacity (embstr). Matches [`EMBSTR_THRESHOLD`]; a -/// value classified as embstr fits here without a separate allocation in the -/// eventual FAM layout. In the safe rep it is a fixed-size inline array plus a -/// length, so an embstr value adds no heap allocation beyond the `KvObj` itself. -pub const INLINE_CAP: usize = EMBSTR_THRESHOLD; - /// The packed per-key header (OBJECT_LAYOUT.md "packed header and metadata bits"). /// /// In the eventual FAM layout these fields are bit-packed into a few bytes (type + @@ -101,66 +95,53 @@ impl Header { } } -/// A small inline string buffer ([`INLINE_CAP`] bytes plus a length), the safe-rep -/// stand-in for the FAM inline-value region. An embstr value lives here with no -/// extra heap allocation. -#[derive(Debug, Clone)] -pub struct InlineBuf { - buf: [u8; INLINE_CAP], - len: u8, -} - -impl InlineBuf { - /// Build from bytes that are known to fit ([`INLINE_CAP`]). Panics if too long; - /// callers (the classifier path) only construct this for embstr-sized values. - #[must_use] - pub fn from_bytes(bytes: &[u8]) -> Self { - assert!( - bytes.len() <= INLINE_CAP, - "InlineBuf overflow: {} > {INLINE_CAP}", - bytes.len() - ); - let mut buf = [0u8; INLINE_CAP]; - buf[..bytes.len()].copy_from_slice(bytes); - InlineBuf { - buf, - len: bytes.len() as u8, - } - } - - /// The inline bytes. - #[must_use] - pub fn as_bytes(&self) -> &[u8] { - &self.buf[..self.len as usize] - } -} - /// The value representation inside a [`KvObj`] (ENCODINGS.md #112). #[derive(Debug, Clone)] pub enum ValueRepr { /// An int-encoded value: the raw i64, NO value allocation (the decimal bytes /// are materialized on read). `OBJECT ENCODING` -> int. Int(i64), - /// A short string stored inline. `OBJECT ENCODING` -> embstr. - Inline(InlineBuf), + /// A short string (embstr). `OBJECT ENCODING` -> embstr. + /// + /// BOXED (memory Round 2): the bytes live behind a `Box<[u8]>` rather than a + /// fixed inline buffer, so this variant is one pointer wide and the largest + /// `ValueRepr` variant shrinks to a `Box<[u8]>`, cutting every per-key `KvObj` + /// and the hashbrown table slot. The embstr-vs-raw distinction is the SAME (it is + /// recorded in [`Header::encoding`], NOT by the variant): a value classified as + /// embstr by [`crate::encoding::EMBSTR_THRESHOLD`] is `Inline`, a longer one is + /// [`ValueRepr::Raw`], + /// and `OBJECT ENCODING` reports `embstr` / `raw` exactly as before. Redis/Valkey + /// also heap-allocate the object body, so this is allocation-parity with redis + /// plus a smaller slot. + Inline(Box<[u8]>), /// A long string stored out-of-line. `OBJECT ENCODING` -> raw. Raw(Box<[u8]>), /// A LIST value (PR-5). `OBJECT ENCODING` -> `listpack` while small, `quicklist` /// once over the threshold (a pure function of the active repr, #40). - List(ListVal), + /// + /// BOXED (memory Round 1): the four collection structs are the large `ValueRepr` + /// variants (`ListVal` 40 / `HashVal` 40 / `SetVal` 48 / `ZSetVal` 64). Holding them + /// behind a `Box` drops `ValueRepr` to the string-variant bound, which shrinks every + /// per-key `KvObj` and the hashbrown table slot. The string/int hot path + /// (`Int`/`Inline`/`Raw`) is UNBOXED so the embstr SSO is untouched; the collections + /// already heap-allocate their contents, so the `Box` is a negligible extra + /// indirection only on collection ops. + List(Box), /// A HASH value (PR-6). `OBJECT ENCODING` -> `listpack` while small, `hashtable` /// once over the entry-count OR per-element-byte threshold (a pure function of the - /// active repr, #40). - Hash(HashVal), + /// active repr, #40). BOXED (memory Round 1); see [`ValueRepr::List`]. + Hash(Box), /// A SET value (PR-7). `OBJECT ENCODING` -> `intset` while all-integer and small, /// `listpack` once a non-integer member is added (and still small), `hashtable` once /// over the entry-count OR per-member-byte threshold (a pure function of the active - /// repr, #40). The conversion is ONE-WAY (never demotes). - Set(SetVal), + /// repr, #40). The conversion is ONE-WAY (never demotes). BOXED (memory Round 1); + /// see [`ValueRepr::List`]. + Set(Box), /// A ZSET (sorted set) value (PR-8). `OBJECT ENCODING` -> `listpack` while small, /// `skiplist` once over the entry-count OR per-member-byte threshold (a pure function - /// of the active repr, #40). The conversion is ONE-WAY (never demotes). - ZSet(ZSetVal), + /// of the active repr, #40). The conversion is ONE-WAY (never demotes). BOXED + /// (memory Round 1); see [`ValueRepr::List`]. + ZSet(Box), } impl ValueRepr { @@ -187,8 +168,9 @@ impl ValueRepr { pub fn logical_len(&self) -> usize { match self { ValueRepr::Int(n) => int_decimal_len(*n), - ValueRepr::Inline(b) => b.as_bytes().len(), - ValueRepr::Raw(b) => b.len(), + // Embstr and raw both hold the value bytes behind a `Box<[u8]>`; the + // embstr-vs-raw distinction lives in `Header.encoding`, not the variant. + ValueRepr::Inline(b) | ValueRepr::Raw(b) => b.len(), ValueRepr::List(l) => l.element_bytes(), ValueRepr::Hash(h) => h.element_bytes(), ValueRepr::Set(s) => s.element_bytes(), @@ -1714,7 +1696,7 @@ impl KvObj { ) -> Self { let value = match classified { Classified::Int(n) => ValueRepr::Int(n), - Classified::EmbStr => ValueRepr::Inline(InlineBuf::from_bytes(bytes)), + Classified::EmbStr => ValueRepr::Inline(bytes.to_vec().into_boxed_slice()), Classified::Raw => ValueRepr::Raw(bytes.to_vec().into_boxed_slice()), }; let header = Header::new(value.encoding(), expire_at.is_some()); @@ -1800,7 +1782,7 @@ impl KvObj { KvObj { header: Header::with_type(DataType::List, encoding, expire_at.is_some()), key: key.to_vec().into_boxed_slice(), - value: ValueRepr::List(list), + value: ValueRepr::List(Box::new(list)), expire_at, } } @@ -1814,7 +1796,7 @@ impl KvObj { KvObj { header: Header::with_type(DataType::Hash, encoding, expire_at.is_some()), key: key.to_vec().into_boxed_slice(), - value: ValueRepr::Hash(hash), + value: ValueRepr::Hash(Box::new(hash)), expire_at, } } @@ -1828,7 +1810,7 @@ impl KvObj { KvObj { header: Header::with_type(DataType::Set, encoding, expire_at.is_some()), key: key.to_vec().into_boxed_slice(), - value: ValueRepr::Set(set), + value: ValueRepr::Set(Box::new(set)), expire_at, } } @@ -1842,7 +1824,7 @@ impl KvObj { KvObj { header: Header::with_type(DataType::ZSet, encoding, expire_at.is_some()), key: key.to_vec().into_boxed_slice(), - value: ValueRepr::ZSet(zset), + value: ValueRepr::ZSet(Box::new(zset)), expire_at, } } @@ -1861,7 +1843,9 @@ impl KvObj { /// yields `None` -> WRONGTYPE). pub fn as_list_mut(&mut self) -> Option<&mut ListVal> { match &mut self.value { - ValueRepr::List(l) => Some(l), + // Deref through the `Box` (memory Round 1) to the `&mut ListVal` the + // collection trait + in-place RMW path expect. + ValueRepr::List(l) => Some(&mut **l), _ => None, } } @@ -1871,7 +1855,7 @@ impl KvObj { /// `None` -> WRONGTYPE). The HASH analog of [`Self::as_list_mut`]. pub fn as_hash_mut(&mut self) -> Option<&mut HashVal> { match &mut self.value { - ValueRepr::Hash(h) => Some(h), + ValueRepr::Hash(h) => Some(&mut **h), _ => None, } } @@ -1881,7 +1865,7 @@ impl KvObj { /// -> WRONGTYPE). The SET analog of [`Self::as_list_mut`]/[`Self::as_hash_mut`]. pub fn as_set_mut(&mut self) -> Option<&mut SetVal> { match &mut self.value { - ValueRepr::Set(s) => Some(s), + ValueRepr::Set(s) => Some(&mut **s), _ => None, } } @@ -1892,7 +1876,7 @@ impl KvObj { /// [`Self::as_set_mut`]. pub fn as_zset_mut(&mut self) -> Option<&mut ZSetVal> { match &mut self.value { - ValueRepr::ZSet(z) => Some(z), + ValueRepr::ZSet(z) => Some(&mut **z), _ => None, } } @@ -1972,7 +1956,7 @@ impl KvObj { pub fn set_value_bytes(&mut self, bytes: &[u8]) { self.value = match classify(bytes) { Classified::Int(n) => ValueRepr::Int(n), - Classified::EmbStr => ValueRepr::Inline(InlineBuf::from_bytes(bytes)), + Classified::EmbStr => ValueRepr::Inline(bytes.to_vec().into_boxed_slice()), Classified::Raw => ValueRepr::Raw(bytes.to_vec().into_boxed_slice()), }; self.header.encoding = self.value.encoding(); diff --git a/crates/ironcache-store/src/lib.rs b/crates/ironcache-store/src/lib.rs index f5b7df5..25feef3 100644 --- a/crates/ironcache-store/src/lib.rs +++ b/crates/ironcache-store/src/lib.rs @@ -576,13 +576,9 @@ impl ShardStore { kvobj::ValueRepr::Int(n) => { ValueRef::from_int_bytes(obj.header.data_type, obj.expire_at, int_decimal_bytes(*n)) } - kvobj::ValueRepr::Inline(b) => ValueRef::borrowed( - obj.header.data_type, - obj.header.encoding, - obj.expire_at, - b.as_bytes(), - ), - kvobj::ValueRepr::Raw(b) => { + // Embstr and raw both borrow their bytes the same way; the embstr-vs-raw + // distinction is carried by `obj.header.encoding`, not the variant. + kvobj::ValueRepr::Inline(b) | kvobj::ValueRepr::Raw(b) => { ValueRef::borrowed(obj.header.data_type, obj.header.encoding, obj.expire_at, b) } // A LIST/HASH/SET is not byte-readable as a string: the command layer only @@ -612,13 +608,9 @@ impl ShardStore { obj.expire_at, int_decimal_bytes(*n), ), - kvobj::ValueRepr::Inline(b) => OccupiedEntry::borrowed( - obj.header.data_type, - obj.header.encoding, - obj.expire_at, - b.as_bytes(), - ), - kvobj::ValueRepr::Raw(b) => { + // Embstr and raw both borrow their bytes the same way; the embstr-vs-raw + // distinction is carried by `obj.header.encoding`, not the variant. + kvobj::ValueRepr::Inline(b) | kvobj::ValueRepr::Raw(b) => { OccupiedEntry::borrowed(obj.header.data_type, obj.header.encoding, obj.expire_at, b) } // A LIST/HASH/SET observed through the READ-ONLY rmw arm (e.g. a numeric RMW @@ -823,17 +815,20 @@ impl Store for ShardStore { // would each take and drop a fresh `&mut` and obscure the dispatch) so each // collection type maps to exactly one arm. let entry = match &mut obj.value { + // The collection variants are boxed (memory Round 1); deref through the + // `Box` (`&mut **`) to the concrete `&mut *Val`, which then coerces to the + // `&mut dyn *Value` trait object the typed view constructors take. kvobj::ValueRepr::List(l) => { - RmwEntry::OccupiedMut(OccupiedEntryMut::list(encoding, expire_at, l)) + RmwEntry::OccupiedMut(OccupiedEntryMut::list(encoding, expire_at, &mut **l)) } kvobj::ValueRepr::Hash(h) => { - RmwEntry::OccupiedMut(OccupiedEntryMut::hash(encoding, expire_at, h)) + RmwEntry::OccupiedMut(OccupiedEntryMut::hash(encoding, expire_at, &mut **h)) } kvobj::ValueRepr::Set(s) => { - RmwEntry::OccupiedMut(OccupiedEntryMut::set(encoding, expire_at, s)) + RmwEntry::OccupiedMut(OccupiedEntryMut::set(encoding, expire_at, &mut **s)) } kvobj::ValueRepr::ZSet(z) => { - RmwEntry::OccupiedMut(OccupiedEntryMut::zset(encoding, expire_at, z)) + RmwEntry::OccupiedMut(OccupiedEntryMut::zset(encoding, expire_at, &mut **z)) } kvobj::ValueRepr::Int(_) | kvobj::ValueRepr::Inline(_) diff --git a/crates/ironcache-store/tests/keyspace.rs b/crates/ironcache-store/tests/keyspace.rs index db03b39..85ded52 100644 --- a/crates/ironcache-store/tests/keyspace.rs +++ b/crates/ironcache-store/tests/keyspace.rs @@ -13,7 +13,7 @@ use ironcache_storage::{ UnixMillis, }; use ironcache_store::ShardStore; -use ironcache_store::kvobj::{Header, InlineBuf, KvObj, ValueRepr}; +use ironcache_store::kvobj::{Header, KvObj, ValueRepr}; use std::collections::HashSet; const NOW: UnixMillis = UnixMillis(1_000); @@ -256,7 +256,7 @@ fn scan_type_filter_selects_by_data_type() { ttl_present: false, snapshot_version: 0, }; - lst.value = ValueRepr::Inline(InlineBuf::from_bytes(b"x")); + lst.value = ValueRepr::Inline(Box::from(&b"x"[..])); s.insert_object(0, lst); let mut out = Vec::new(); diff --git a/docs/bench/OPTIMIZATION_LOG.md b/docs/bench/OPTIMIZATION_LOG.md new file mode 100644 index 0000000..7ce774f --- /dev/null +++ b/docs/bench/OPTIMIZATION_LOG.md @@ -0,0 +1,132 @@ + + +# IronCache optimization log (target: beat redis 8.8.0) + +The running tally of efficiency optimizations: the approach, the hypothesis, what +the measurement said, and KEPT or REVERTED. The goal is to be a CLEAR winner over +redis 8.8.0 on BOTH memory (bytes-per-key) and speed (get/set throughput + +latency). Focus is Redis first; the others follow. + +Rule against tunnel vision: if the same algorithmic approach fails to move the +needle ~10 times, abandon it and try a structurally different one. + +## Measurement honesty + +- **Memory (bytes-per-key)** is measured as the INFO `used_memory` delta over a + deterministic N-key populate (scripts/bench/headtohead.sh) and via the + allocator-true `memmodel` (A1). It is RELIABLE on any box (not contention + sensitive). This is the metric we ratchet hardest. +- **Op-level speed** is measured by the criterion micro-benches (in-process, + reliable): RESP codec, hashtable probe/insert. These are not contention bound. +- **Throughput (closed-loop QPS)** on this unpinned macOS dev box is + CONTENTION-BOUND (the load generator shares cores with the server), so absolute + QPS vs redis is INDICATIVE only; the authoritative throughput verdict needs a + pinned Linux run (A3/A4 are ready for it). We track relative QPS changes here. + +## Baseline (2026-06-16, IronCache 0.0.0 vs redis-server 8.8.0, unpinned macOS, 300k keys, 128B values) + +| metric | IronCache | redis 8.8.0 | ratio | verdict | +| --- | ---: | ---: | ---: | --- | +| bytes-per-key | 526.7 | 218.6 | 2.41x heavier | LOSE (memory) | +| qps (closed, contention-bound) | 71.4k | 140.8k | 0.51x | LOSE (indicative) | +| open-loop p50 | 1005 us | 1009 us | ~parity | tie | +| open-loop p99 | 2647 us | 74175 us | 0.04x | WIN (latency) | + +## Where the per-key memory goes (sizeof, measured) + +- hashbrown slot `(Box<[u8]>, KvObj)` = **128 B** -> the table bucket array (at + 7/8 load) costs ~146 B/key. This is the dominant structural overhead vs Redis's + pointer-sized dict slot. +- `KvObj` = 112 B = Header(8) + key `Box<[u8]>`(16, a SEPARATE key allocation) + + `ValueRepr`(72) + `Option`(16). +- `ValueRepr` = 72 B, sized for its largest variants: `InlineBuf`(45, the embstr + SSO buffer) and `ZSetVal`(64). A string/int value uses <= 16 B of it, so ~56 B + is reserved-but-unused per key. +- Per key for a 128 B value there are ~3 allocations (key, value, and the + amortized table bucket) vs Redis's ~1 (kvobj packs key+value+ttl into one + allocation behind a dict pointer). + +## Lever list (highest expected memory impact first) + +- **L-FAM (endgame): single-allocation kvobj** (OBJECT_LAYOUT.md): pack + header+key+value into ONE allocation behind a thin slot, like Redis kvobj / + Valkey embedded key. Biggest win; needs unsafe (forbidden today) or a careful + safe single-Box layout. Large effort. +- **L-VR: shrink ValueRepr** by boxing the inline buffer + collections so the enum + is ~16 B (tag + i64/ptr). KvObj 112 -> ~56, slot 128 -> ~72. Removes the ~56 B + reserved waste. Tradeoff: boxing the embstr SSO buffer adds an allocation for + short strings (a possible speed cost) - measure both. +- **L-COLL: box only the collection variants** (List/Hash/Set/ZSet). Safe, keeps + the embstr SSO (speed), bounded by InlineBuf(45): ValueRepr 72 -> ~48, slot + 128 -> ~104. Small (~20 B/key) but zero hot-path risk. (Round 1.) +- **L-IDX: a denser index** (Dragonfly-style Dashtable: extendible hashing, far + less per-entry metadata than a Swiss table at high load). Structural table win; + large. +- **L-LF: load-factor / sizing tuning.** Cheap, bounded; only after the slot size + is settled. + +## Rounds + +| # | Approach | Hypothesis | Memory result | Speed result | Verdict | +| --- | --- | --- | --- | --- | --- | +| 1 | L-COLL: box List/Hash/Set/ZSet variants | ValueRepr 72->48, slot 128->104, ~20 B/key | bytes/key 526.7 -> 421.86 (-20%; gap 2.41x -> 1.93x). memmodel table slack 209.7 -> 146.8 | qps 71.4k -> 77.9k (+9%, smaller slot = better cache density) | **KEPT** - improved BOTH, zero behavior change (all tests green), SSO preserved | + +| 2 | L-VR: box the embstr inline buffer (Inline(InlineBuf) -> Inline(Box<[u8]>)) | ValueRepr 48->24, slot 104->80, more table savings | bytes/key (128B) 421.86 -> 386.85 (gap 1.93x -> 1.77x); table slack 146.8 -> 125.8; embstr total 177 -> 172 | qps ~77.6k (flat) | **KEPT** - improved 128B memory; allocation-parity with redis (which also heap-allocs the object) | + +### KEY STRUCTURAL FINDING (after rounds 1-2) +The SMALL-value case exposes the real wall. At 32B values: IronCache 291 vs redis +101 bytes/key = 2.88x. redis 8.8's kvobj packs key+value+ttl into ONE allocation +(~69 B overhead) behind a pointer-sized dict slot. IronCache makes ~3 allocations +per key (the key Box, the value Box, and carries a 64 B object in an 80 B table +slot) AND duplicates the key (the hashbrown key + nothing shares it). Safe +field-shrinks (rounds 1-2) cannot close this; the per-key FIXED overhead is +structural. THE LEVER: a SINGLE-ALLOCATION entry holding header+key+value in one +Box<[u8]> blob, in a key-dedup table (hashbrown's low-level HashTable, hashing the +key slice inside the blob), so a string key is ONE allocation and a pointer-sized +slot, like Redis/Dragonfly. This is SAFE (Box<[u8]> slicing, no unsafe), so it +does NOT need an unsafe/ADR decision; it is a large store-core rewrite. Collections +stay boxed structs (not flat blobs). Scoped as Round 3 (the big one). Micro-tweaks +(u64 TTL sentinel, inline short keys) are deliberately SKIPPED because the +single-alloc rewrite subsumes them (no tunnel vision on soon-replaced changes). + +### Round 3 (next, the big one): single-allocation blob entry - VALIDATED design + +Research (redis 8.2 kvobj, valkey 8.0/8.1, Dragonfly Dashtable, hashbrown, +SwissTable/Dash/MemC3/F14 papers) confirms the lever and a SAFE Rust path: + +- **Table:** `hashbrown::HashTable` (the low-level explicit-hash API, what + IndexMap uses), with caller-supplied `hash`/`eq` closures that read the key + slice from INSIDE the entry. The table stores ONLY the entry handle and does + NOT duplicate the key. Empirically compiles under `#![forbid(unsafe_code)]` + (hashbrown's unsafe is encapsulated; we call only its safe API). hashbrown + HashTable since 0.14.2, MSRV 1.85 - matches ours; we already depend on it. +- **Entry:** a THIN-pointer single allocation (8 B slot, not Box's 16 B fat + pointer): `triomphe::ThinArc` (header can cache the u64 hash to + avoid re-hash on resize; refcounted, copy-on-write writes) OR + `thin-vec::ThinVec` (unique ownership, in-place growth). Layout + `[packed header | (ttl u64) | key_len | key | value]`, key BEFORE value (key is + immutable), value inlined when header+key+value fits an allocator bin (our + embstr analogue), else an out-of-line value pointer. Mirrors redis kvobj exactly. +- **Collections** (List/Hash/Set/ZSet, structured) stay boxed structs referenced + by the entry's value pointer; the entry still collapses the KEY into one + key+header allocation (the main win). +- **Expected:** 3 allocations/key -> 1 (short strings) or 2 (long/collections), + an 8 B slot, and NO key duplication -> the redis 8.2 / valkey 8.1 regime + (~20-30 B/key overhead), closing the small-value gap (today 2.88x at 32B). +- **Risks:** safe bounds-checked blob parsing on every access (keep in one + property-tested module); ThinArc writes are copy-on-write (rebuild blob, redis + also reallocs); cache the hash in the header to avoid resize re-hash; embedding + TTL drops the separate expires index (decide active-expiry: scan or a secondary + light index); hashbrown's post-doubling trough (~39 B/entry) is the one spot a + future Dash table would beat - note, do not block. +- This is a large ironcache-store core rewrite (the entry rep + the table + + the primitives behind the frozen Store waist), staged and gated by the A5 + perf-gate + the full test suite. Sources logged in the research transcript. + +### Round 1 detail +Boxed `ValueRepr::{List,Hash,Set,ZSet}` (kvobj.rs) + the rmw dispatch / accessors +(lib.rs); 2 files, ~13 sites, all tests green, sizeof KvObj 112->88, ValueRepr +72->48, slot 128->104. Win was larger than predicted (~105 B/key not ~20) because +the table-bucket-array slack scales with slot size and compounds at the load +factor. Next: the slot is still 104 B; the InlineBuf(45) is now the ValueRepr +bound and the `Option`(16) is reserved per key. Round 2 targets those.