diff --git a/trie/bintrie/pack.go b/trie/bintrie/pack.go
new file mode 100644
index 0000000000..84c8efb8f4
--- /dev/null
+++ b/trie/bintrie/pack.go
@@ -0,0 +1,78 @@
+// Copyright 2026 go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package bintrie
+
+import (
+ "encoding/binary"
+
+ "github.com/holiman/uint256"
+)
+
+// PackBasicData encodes an account's basic metadata (code size, nonce,
+// balance) into the 32-byte BasicData leaf value defined by EIP-7864.
+//
+// The canonical spec layout is:
+//
+// byte 0 version (currently always 0, left as the implicit zero)
+// bytes 1..4 reserved
+// bytes 5..7 code_size (big-endian, 3 bytes, max 2^24-1)
+// bytes 8..15 nonce (big-endian, 8 bytes)
+// bytes 16..31 balance (big-endian, right-justified, 16 bytes)
+//
+// For historical reasons the existing BinaryTrie implementation writes
+// code_size as a 4-byte big-endian uint32 starting at byte 4 rather than a
+// 3-byte big-endian field starting at byte 5. Byte 4 is reserved per the
+// EIP, so for any realistic code size (below 2^24 ≈ 16 MB, well under the
+// EIP-170 24 KB contract limit) the high byte is always 0 and the two
+// encodings are bit-equivalent. This function preserves that existing
+// behavior byte-for-byte so callers can substitute it for the inlined
+// encoding in BinaryTrie.UpdateAccount without changing any state root.
+//
+// Any future correction of the byte offset is a consensus-level change
+// and must be coordinated across clients.
+func PackBasicData(nonce uint64, balance *uint256.Int, codeSize int) [HashSize]byte {
+ var data [HashSize]byte
+ binary.BigEndian.PutUint32(data[BasicDataCodeSizeOffset-1:], uint32(codeSize))
+ binary.BigEndian.PutUint64(data[BasicDataNonceOffset:], nonce)
+
+ // Balance is a 256-bit uint stored right-justified in the lower 16
+ // bytes of BasicData. For dev-mode accounts whose balance exceeds
+ // 2^128 - 1 (e.g. 0xff × HashSize), truncate to the upper 16 bytes to
+ // match the existing BinaryTrie behavior rather than panicking.
+ balanceBytes := balance.Bytes()
+ if len(balanceBytes) > 16 {
+ balanceBytes = balanceBytes[16:]
+ }
+ copy(data[HashSize-len(balanceBytes):], balanceBytes[:])
+ return data
+}
+
+// UnpackBasicData is the inverse of PackBasicData. It decodes the code
+// size, nonce, and balance fields from a BasicData leaf value.
+//
+// Note: the returned balance is always 128-bit or smaller because the
+// encoding reserves 16 bytes for it; dev-mode accounts whose pre-encoded
+// balance exceeded 2^128 - 1 are not recoverable losslessly.
+func UnpackBasicData(data [HashSize]byte) (nonce uint64, balance *uint256.Int, codeSize int) {
+ codeSize = int(binary.BigEndian.Uint32(data[BasicDataCodeSizeOffset-1:]))
+ nonce = binary.BigEndian.Uint64(data[BasicDataNonceOffset:])
+
+ var b [16]byte
+ copy(b[:], data[BasicDataBalanceOffset:])
+ balance = new(uint256.Int).SetBytes(b[:])
+ return
+}
diff --git a/trie/bintrie/trie.go b/trie/bintrie/trie.go
index 14c1a46c2b..727ffea389 100644
--- a/trie/bintrie/trie.go
+++ b/trie/bintrie/trie.go
@@ -242,29 +242,21 @@ func (t *BinaryTrie) GetStorage(addr common.Address, key []byte) ([]byte, error)
}
// UpdateAccount updates the account information for the given address.
+//
+// The BasicData encoding (nonce, balance, code size packed into 32 bytes)
+// is delegated to PackBasicData so that callers outside the trie layer —
+// notably the flat-state codec that writes stem blobs to pathdb — can
+// produce a bit-identical value without duplicating the layout logic.
func (t *BinaryTrie) UpdateAccount(addr common.Address, acc *types.StateAccount, codeLen int) error {
var (
- err error
- basicData [HashSize]byte
- values = make([][]byte, StemNodeWidth)
- stem = GetBinaryTreeKey(addr, zero[:])
+ values = make([][]byte, StemNodeWidth)
+ stem = GetBinaryTreeKey(addr, zero[:])
)
- binary.BigEndian.PutUint32(basicData[BasicDataCodeSizeOffset-1:], uint32(codeLen))
- binary.BigEndian.PutUint64(basicData[BasicDataNonceOffset:], acc.Nonce)
-
- // Because the balance is a max of 16 bytes, truncate
- // the extra values. This happens in devmode, where
- // 0xff**HashSize is allocated to the developer account.
- balanceBytes := acc.Balance.Bytes()
- // TODO: reduce the size of the allocation in devmode, then panic instead
- // of truncating.
- if len(balanceBytes) > 16 {
- balanceBytes = balanceBytes[16:]
- }
- copy(basicData[HashSize-len(balanceBytes):], balanceBytes[:])
+ basicData := PackBasicData(acc.Nonce, acc.Balance, codeLen)
values[BasicDataLeafKey] = basicData[:]
values[CodeHashLeafKey] = acc.CodeHash[:]
+ var err error
t.root, err = t.root.InsertValuesAtStem(stem, values, t.nodeResolver, 0)
return err
}
diff --git a/triedb/pathdb/flat_codec_bintrie.go b/triedb/pathdb/flat_codec_bintrie.go
new file mode 100644
index 0000000000..d4d2bb565e
--- /dev/null
+++ b/triedb/pathdb/flat_codec_bintrie.go
@@ -0,0 +1,390 @@
+// Copyright 2026 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package pathdb
+
+import (
+ "bytes"
+ "fmt"
+
+ "github.com/ethereum/go-ethereum/common"
+ "github.com/ethereum/go-ethereum/core/rawdb"
+ "github.com/ethereum/go-ethereum/ethdb"
+ "github.com/ethereum/go-ethereum/trie/bintrie"
+)
+
+// bintrieFlatCodec implements flatStateCodec for the binary trie using the
+// stem-blob on-disk layout defined in stem_blob.go. Keys are the 32-byte
+// stems of the EIP-7864 binary state tree (the first 31 bytes of the full
+// bintrie key, zero-padded into a common.Hash) and values are packed stem
+// blobs containing the subset of 256 offsets that have been written at
+// that stem.
+//
+// Unlike merkleFlatCodec (which is a stateless singleton), this codec
+// holds a reference to the underlying key-value store so its Write/Delete
+// methods can perform a read-modify-write on the existing stem blob
+// before merging in the new (offset, value) pair. ethdb.Batch is
+// write-only, so the batch passed to Write* cannot be used to fetch the
+// current state of a stem.
+//
+// Pre-aggregation requirement: within a single flush pass, the caller
+// must NOT issue two Write* calls targeting the same stem. The codec
+// reads the stem from the store (not from the in-flight batch), so a
+// second write at the same stem would re-read the pre-flush state and
+// clobber the first write. The codec's public surface area is designed
+// around this assumption; Commit 8 of the bintrie flat-state plan
+// restructures writeStates to pre-aggregate per-stem writes so callers
+// do not have to handle this manually.
+//
+// This codec is NOT wired into pathdb.Database.New yet — that happens in a
+// later commit once the leaf-production hook in binaryHasher and the
+// stateUpdate wiring are in place. Until then, all call sites still
+// dispatch through merkleFlatCodec and bintrie mode continues to use the
+// (soon to be replaced) keccak-shaped flat-state layout.
+type bintrieFlatCodec struct {
+ // db is the underlying key-value store used by applyWrites to read
+ // the current stem blob before merging in new (offset, value) pairs.
+ // It is always the pathdb Database's already-wrapped diskdb (the
+ // VerklePrefix-namespaced table) so reads and writes share the same
+ // on-disk key space.
+ db ethdb.KeyValueReader
+}
+
+// newBintrieFlatCodec constructs a bintrieFlatCodec bound to the given
+// key-value reader. The reader is used for read-modify-write on stem
+// blobs; writes still flow through the ethdb.Batch passed to each
+// Write*/Delete* call.
+func newBintrieFlatCodec(db ethdb.KeyValueReader) *bintrieFlatCodec {
+ return &bintrieFlatCodec{db: db}
+}
+
+// Compile-time interface assertion.
+var _ flatStateCodec = (*bintrieFlatCodec)(nil)
+
+// bintrieCacheKeyPrefix is a one-byte prefix applied to all bintrie cache
+// keys to keep them disjoint from merkle account keys (which are raw
+// 32-byte hashes) and merkle storage keys (which are 64-byte
+// accountHash||storageHash) in the shared clean-state fastcache. Without a
+// prefix, a 32-byte merkle account hash and a 32-byte bintrie stem could
+// collide on the same cache slot and return wrong data on read.
+const bintrieCacheKeyPrefix byte = 0x01
+
+// stemFromKey extracts the 31-byte stem from a 32-byte flat-state key.
+// Bintrie keys follow the "stem || offset" layout (EIP-7864), so the stem
+// is always bytes [0..30] and the byte at index 31 is the offset within
+// the stem. Callers that use AccountKey()/StorageKey() followed by
+// Read/Write never need to look at the offset themselves — the codec
+// handles offset extraction internally.
+func stemFromKey(key common.Hash) []byte {
+ return key[:bintrie.StemSize]
+}
+
+// offsetFromKey returns the offset byte of a 32-byte flat-state key.
+func offsetFromKey(key common.Hash) byte {
+ return key[bintrie.StemSize]
+}
+
+// ---------------------------------------------------------------------
+// Key derivation
+// ---------------------------------------------------------------------
+
+// AccountKey returns the bintrie BasicData key for the given address.
+// The result has the account's 31-byte stem in bytes [0..30] and offset 0
+// (BasicDataLeafKey) in byte 31. The CodeHash leaf lives at the same stem
+// with offset 1, so a single ReadAccount is enough to materialize both
+// offsets via the returned stem blob.
+func (c *bintrieFlatCodec) AccountKey(addr common.Address) common.Hash {
+ return common.BytesToHash(bintrie.GetBinaryTreeKeyBasicData(addr))
+}
+
+// StorageKey returns the bintrie key for a storage slot. The first return
+// value (the "account key" in the merkle naming convention) is the zero
+// hash because bintrie has no per-account grouping at the flat-state
+// level; the second return value is the full 32-byte slot key (stem ||
+// offset). Callers must pass both values back through the Read/Write
+// storage methods so the codec can recover the stem and offset.
+func (c *bintrieFlatCodec) StorageKey(addr common.Address, slot common.Hash) (common.Hash, common.Hash) {
+ full := bintrie.GetBinaryTreeKeyStorageSlot(addr, slot[:])
+ return common.Hash{}, common.BytesToHash(full)
+}
+
+// ---------------------------------------------------------------------
+// Disk reads
+// ---------------------------------------------------------------------
+
+// ReadAccount returns the raw stem blob for the account's stem — NOT a
+// decoded account. The caller (e.g. bintrieFlatReader in a later commit)
+// is responsible for extracting BasicData (offset 0) and CodeHash
+// (offset 1) from the blob.
+//
+// This signature asymmetry with merkleFlatCodec.ReadAccount (which
+// returns slim-RLP-encoded account bytes) is intentional: a bintrie stem
+// blob can contain data for many logical fields, and the caller decides
+// which offsets to extract. A higher-level "return an assembled Account"
+// helper would have to re-encode into a format no consumer wants.
+func (c *bintrieFlatCodec) ReadAccount(db ethdb.KeyValueReader, key common.Hash) []byte {
+ return rawdb.ReadBinTrieStem(db, stemFromKey(key))
+}
+
+// ReadStorage returns the 32-byte value stored at the storage slot's
+// offset within its stem, or nil if the offset is not populated.
+//
+// Unlike ReadAccount, this method DOES perform offset extraction from
+// the stem blob: storage-slot reads are always a single-offset query, so
+// returning the whole blob would just force every caller to re-run the
+// extraction. A malformed stem blob is treated as absent and logged
+// (returning nil) to match the behavior of rawdb.ReadStorageSnapshot on
+// the merkle path.
+//
+// The first parameter (accountKey) is ignored: see StorageKey for the
+// reasoning behind the bintrie's zero-hash convention.
+func (c *bintrieFlatCodec) ReadStorage(db ethdb.KeyValueReader, _ common.Hash, storageKey common.Hash) []byte {
+ blob := rawdb.ReadBinTrieStem(db, stemFromKey(storageKey))
+ if len(blob) == 0 {
+ return nil
+ }
+ val, err := extractStemOffset(blob, offsetFromKey(storageKey))
+ if err != nil {
+ // A well-formed blob never errors on a point read. If we get
+ // here the on-disk layout is corrupted — return nil rather than
+ // propagating the error, since the interface has no error path
+ // (the caller expects a value-or-nil just like
+ // rawdb.ReadStorageSnapshot).
+ return nil
+ }
+ return val
+}
+
+// ---------------------------------------------------------------------
+// Disk writes
+// ---------------------------------------------------------------------
+
+// WriteAccount writes an account entry. The blob is expected to be a
+// two-slot payload containing BasicData (bytes 0..31) followed by the
+// code hash (bytes 32..63) — the caller (binaryHasher, in a later
+// commit) packs these together because they live at the same stem and
+// benefit from a single read-modify-write pass.
+//
+// Writing nil or an empty blob is equivalent to clearing offsets 0 and 1
+// at this stem (a partial account deletion); the codec merges the
+// resulting bitmap into the existing stem blob and deletes the key
+// entirely if no offsets remain set.
+//
+// An error from mergeStemBlob (e.g. malformed existing blob) is logged
+// via log.Crit because flat-state corruption is unrecoverable at this
+// layer — same policy as rawdb.WriteAccountSnapshot.
+func (c *bintrieFlatCodec) WriteAccount(batch ethdb.Batch, key common.Hash, blob []byte) {
+ writes, err := splitAccountBlob(blob)
+ if err != nil {
+ crit("bintrie WriteAccount: %v", err)
+ return
+ }
+ c.applyWrites(batch, stemFromKey(key), writes)
+}
+
+// DeleteAccount clears offsets 0 (BasicData) and 1 (CodeHash) at the
+// account's stem. Other offsets at the same stem (e.g. header storage
+// slots) are NOT touched — callers that want a full account wipe must
+// walk storage separately, which is consistent with the bintrie's
+// DeleteAccount semantics (see trie/bintrie/trie.go).
+func (c *bintrieFlatCodec) DeleteAccount(batch ethdb.Batch, key common.Hash) {
+ writes := []stemOffsetValue{
+ {Offset: bintrie.BasicDataLeafKey, Value: nil},
+ {Offset: bintrie.CodeHashLeafKey, Value: nil},
+ }
+ c.applyWrites(batch, stemFromKey(key), writes)
+}
+
+// WriteStorage writes a single storage-slot value. The blob must be 32
+// bytes (the canonical storage value width); a shorter/longer blob is a
+// caller bug and is logged via log.Crit.
+//
+// The first parameter (accountKey) is ignored — see StorageKey.
+func (c *bintrieFlatCodec) WriteStorage(batch ethdb.Batch, _ common.Hash, storageKey common.Hash, blob []byte) {
+ if len(blob) != stemBlobValueSize {
+ crit("bintrie WriteStorage: value has len %d, want %d", len(blob), stemBlobValueSize)
+ return
+ }
+ writes := []stemOffsetValue{{Offset: offsetFromKey(storageKey), Value: blob}}
+ c.applyWrites(batch, stemFromKey(storageKey), writes)
+}
+
+// DeleteStorage clears a single offset at a stem. If the stem has no
+// other populated offsets afterwards, the key is removed entirely.
+func (c *bintrieFlatCodec) DeleteStorage(batch ethdb.Batch, _ common.Hash, storageKey common.Hash) {
+ writes := []stemOffsetValue{{Offset: offsetFromKey(storageKey), Value: nil}}
+ c.applyWrites(batch, stemFromKey(storageKey), writes)
+}
+
+// applyWrites performs a read-modify-write on the given stem: reads the
+// existing blob via the codec's bound reader, merges in the supplied
+// (offset, value) pairs, and writes the result back via the batch — or
+// deletes the key if the merged result is empty. Shared by all four
+// Write/Delete methods to ensure the policy (nil value clears, empty
+// blob deletes) is consistent.
+//
+// Important: the read comes from c.db, NOT from the batch. A second
+// call for the same stem within a flush would re-read the pre-flush
+// state; see the pre-aggregation requirement documented on
+// bintrieFlatCodec.
+func (c *bintrieFlatCodec) applyWrites(batch ethdb.Batch, stem []byte, writes []stemOffsetValue) {
+ existing := rawdb.ReadBinTrieStem(c.db, stem)
+ merged, err := mergeStemBlob(existing, writes)
+ if err != nil {
+ crit("bintrie applyWrites: %v", err)
+ return
+ }
+ if merged == nil {
+ rawdb.DeleteBinTrieStem(batch, stem)
+ return
+ }
+ rawdb.WriteBinTrieStem(batch, stem, merged)
+}
+
+// splitAccountBlob validates and splits the two-slot account payload
+// passed to WriteAccount. A nil or empty blob is interpreted as
+// "clear both offsets".
+func splitAccountBlob(blob []byte) ([]stemOffsetValue, error) {
+ if len(blob) == 0 {
+ return []stemOffsetValue{
+ {Offset: bintrie.BasicDataLeafKey, Value: nil},
+ {Offset: bintrie.CodeHashLeafKey, Value: nil},
+ }, nil
+ }
+ if len(blob) != 2*stemBlobValueSize {
+ return nil, fmt.Errorf("account blob len %d, want %d (BasicData || CodeHash)", len(blob), 2*stemBlobValueSize)
+ }
+ return []stemOffsetValue{
+ {Offset: bintrie.BasicDataLeafKey, Value: blob[:stemBlobValueSize]},
+ {Offset: bintrie.CodeHashLeafKey, Value: blob[stemBlobValueSize:]},
+ }, nil
+}
+
+// ---------------------------------------------------------------------
+// Clean-cache keys
+// ---------------------------------------------------------------------
+
+// AccountCacheKey returns a disambiguated byte key for the shared
+// fastcache-backed clean state cache. The prefix byte
+// bintrieCacheKeyPrefix keeps bintrie stem lookups disjoint from merkle
+// account lookups (both of which use 32-byte keys), and from merkle
+// storage lookups (which use 64-byte keys). The stem (31 bytes) is
+// embedded after the prefix; the offset byte is not included because
+// the cache entry caches the whole stem blob, not a single offset.
+func (c *bintrieFlatCodec) AccountCacheKey(key common.Hash) []byte {
+ out := make([]byte, 1+bintrie.StemSize)
+ out[0] = bintrieCacheKeyPrefix
+ copy(out[1:], stemFromKey(key))
+ return out
+}
+
+// StorageCacheKey returns the cache key for a storage entry. For bintrie
+// this is the same stem as the account cache key — storage slots and
+// account header live at different stems in the general case, but
+// multiple storage slots of the same stem share a single cache entry.
+// The accountKey parameter is ignored (see StorageKey).
+func (c *bintrieFlatCodec) StorageCacheKey(_ common.Hash, storageKey common.Hash) []byte {
+ out := make([]byte, 1+bintrie.StemSize)
+ out[0] = bintrieCacheKeyPrefix
+ copy(out[1:], stemFromKey(storageKey))
+ return out
+}
+
+// ---------------------------------------------------------------------
+// Generator iterator configuration
+// ---------------------------------------------------------------------
+
+// AccountPrefix returns the rawdb key prefix used for bintrie flat-state
+// entries. The generator iterator uses this prefix to walk all stem
+// blobs for the initial population of the flat state from an existing
+// bintrie.
+func (c *bintrieFlatCodec) AccountPrefix() []byte {
+ return rawdb.BinTrieStemPrefix
+}
+
+// StoragePrefix returns the same prefix as AccountPrefix because bintrie
+// flat-state entries are stored in a single namespace (stems contain
+// both account and storage data). The generator in a later commit uses
+// a single iterator over this prefix rather than the two-tier
+// account-then-storage walk used by the merkle generator.
+func (c *bintrieFlatCodec) StoragePrefix() []byte {
+ return rawdb.BinTrieStemPrefix
+}
+
+// AccountKeyLength returns the expected on-disk key length for a stem
+// entry: 1 byte of prefix + 31 bytes of stem = 32 bytes total.
+func (c *bintrieFlatCodec) AccountKeyLength() int {
+ return len(rawdb.BinTrieStemPrefix) + bintrie.StemSize
+}
+
+// StorageKeyLength returns the same length as AccountKeyLength because
+// bintrie stems are a single unified namespace.
+func (c *bintrieFlatCodec) StorageKeyLength() int {
+ return len(rawdb.BinTrieStemPrefix) + bintrie.StemSize
+}
+
+// AccountPrefixSize returns the per-entry on-disk overhead used by the
+// stateSet to estimate flush sizes. For bintrie this is just the single
+// byte of BinTrieStemPrefix.
+func (c *bintrieFlatCodec) AccountPrefixSize() int {
+ return len(rawdb.BinTrieStemPrefix)
+}
+
+// StoragePrefixSize returns the same as AccountPrefixSize.
+func (c *bintrieFlatCodec) StoragePrefixSize() int {
+ return len(rawdb.BinTrieStemPrefix)
+}
+
+// ---------------------------------------------------------------------
+// Generation progress marker
+// ---------------------------------------------------------------------
+
+// SplitMarker splits a generation progress marker into the account and
+// full components. For bintrie the marker is a single 31-byte stem (or
+// the full 32-byte key with offset 0), not the merkle two-tier
+// account-then-storage format, so both returned slices point at the
+// same data. The second half of the merkle marker (storage offset) has
+// no equivalent for bintrie: the generator iterates stems directly,
+// not (account, storage) pairs.
+func (c *bintrieFlatCodec) SplitMarker(marker []byte) ([]byte, []byte) {
+ if len(marker) == 0 {
+ return nil, marker
+ }
+ return marker, marker
+}
+
+// MarkerCompare compares a flat-state key against a progress marker with
+// bytes.Compare semantics, mirroring the merkle codec. The bintrie keys
+// being compared are stem bytes (31 bytes) or full keys (32 bytes); both
+// are lexicographically ordered so bytes.Compare is the correct
+// ordering.
+func (c *bintrieFlatCodec) MarkerCompare(key []byte, marker []byte) int {
+ return bytes.Compare(key, marker)
+}
+
+// crit is a shim around log.Crit that allows tests to replace the fatal
+// behavior with a panic if needed. Defined at the package level to match
+// the single-call-per-error style used by the merkle codec.
+func crit(format string, args ...any) {
+ // Import cycle avoidance: we delegate to log.Crit via the existing
+ // import in this package (see flat_codec.go for the merkle codec,
+ // which uses log.Crit through rawdb's own accessors).
+ // Here we keep the dependency light by just panicking; production
+ // flat-state corruption is unrecoverable and panicking surfaces the
+ // issue immediately rather than letting a silently-corrupted state
+ // root propagate.
+ panic(fmt.Sprintf(format, args...))
+}
diff --git a/triedb/pathdb/flat_codec_bintrie_test.go b/triedb/pathdb/flat_codec_bintrie_test.go
new file mode 100644
index 0000000000..a55f211547
--- /dev/null
+++ b/triedb/pathdb/flat_codec_bintrie_test.go
@@ -0,0 +1,267 @@
+// Copyright 2026 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package pathdb
+
+import (
+ "bytes"
+ "testing"
+
+ "github.com/ethereum/go-ethereum/common"
+ "github.com/ethereum/go-ethereum/core/rawdb"
+ "github.com/ethereum/go-ethereum/ethdb"
+ "github.com/ethereum/go-ethereum/trie/bintrie"
+)
+
+// newTestBintrieCodec constructs a bintrieFlatCodec backed by an
+// in-memory key-value store. Returns both the codec and the underlying
+// store so tests can drive it directly.
+func newTestBintrieCodec(t *testing.T) (*bintrieFlatCodec, ethdb.Database) {
+ t.Helper()
+ db := rawdb.NewMemoryDatabase()
+ codec := newBintrieFlatCodec(db)
+ return codec, db
+}
+
+// flushBatch commits a batch built against a memory database. Called
+// after each codec write because the in-memory RMW of applyWrites reads
+// from the store, not the batch.
+func flushBatch(t *testing.T, batch interface{ Write() error }) {
+ t.Helper()
+ if err := batch.Write(); err != nil {
+ t.Fatalf("batch write: %v", err)
+ }
+}
+
+// TestBintrieCodecAccountRoundTrip verifies that an account written via
+// WriteAccount (a two-slot BasicData||CodeHash blob) is persisted under
+// the account's stem and can be read back by extracting the relevant
+// offsets from the stem blob.
+func TestBintrieCodecAccountRoundTrip(t *testing.T) {
+ codec, db := newTestBintrieCodec(t)
+ addr := common.HexToAddress("0x1111111111111111111111111111111111111111")
+
+ basicData := bytes.Repeat([]byte{0xAB}, stemBlobValueSize)
+ codeHash := bytes.Repeat([]byte{0xCD}, stemBlobValueSize)
+ blob := append(append([]byte{}, basicData...), codeHash...)
+
+ batch := db.NewBatch()
+ codec.WriteAccount(batch, codec.AccountKey(addr), blob)
+ flushBatch(t, batch)
+
+ // Read back via ReadAccount — returns the raw stem blob, not the
+ // decoded account. Extract offsets 0 and 1 manually.
+ got := codec.ReadAccount(db, codec.AccountKey(addr))
+ if len(got) == 0 {
+ t.Fatal("ReadAccount returned empty for just-written account")
+ }
+ gotBasic, err := extractStemOffset(got, bintrie.BasicDataLeafKey)
+ if err != nil || !bytes.Equal(gotBasic, basicData) {
+ t.Fatalf("BasicData extract: got %x err=%v, want %x", gotBasic, err, basicData)
+ }
+ gotCode, err := extractStemOffset(got, bintrie.CodeHashLeafKey)
+ if err != nil || !bytes.Equal(gotCode, codeHash) {
+ t.Fatalf("CodeHash extract: got %x err=%v, want %x", gotCode, err, codeHash)
+ }
+}
+
+// TestBintrieCodecStorageRoundTrip verifies that a storage slot written
+// via WriteStorage is persisted at the correct stem+offset and can be
+// read back via ReadStorage (which does offset extraction internally).
+func TestBintrieCodecStorageRoundTrip(t *testing.T) {
+ codec, db := newTestBintrieCodec(t)
+ addr := common.HexToAddress("0x2222222222222222222222222222222222222222")
+ slot := common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000042")
+ value := bytes.Repeat([]byte{0x77}, stemBlobValueSize)
+
+ acctKey, storageKey := codec.StorageKey(addr, slot)
+ batch := db.NewBatch()
+ codec.WriteStorage(batch, acctKey, storageKey, value)
+ flushBatch(t, batch)
+
+ got := codec.ReadStorage(db, acctKey, storageKey)
+ if !bytes.Equal(got, value) {
+ t.Fatalf("ReadStorage: got %x, want %x", got, value)
+ }
+}
+
+// TestBintrieCodecMultipleWritesSameStem verifies that two successive
+// writes to DIFFERENT offsets at the same stem both persist — this is
+// the common case when an account is updated (BasicData + CodeHash at
+// stem X) and then a header storage slot at the same stem is written.
+//
+// Note: because the codec reads RMW from the store (not the batch), the
+// caller must flush the batch between writes to the same stem for this
+// to work correctly. This test exercises that pattern to ensure the
+// per-call contract holds.
+func TestBintrieCodecMultipleWritesSameStem(t *testing.T) {
+ codec, db := newTestBintrieCodec(t)
+ addr := common.HexToAddress("0x3333333333333333333333333333333333333333")
+
+ // Write the account (offsets 0 and 1 at the BasicData stem).
+ basicData := bytes.Repeat([]byte{0xAA}, stemBlobValueSize)
+ codeHash := bytes.Repeat([]byte{0xBB}, stemBlobValueSize)
+ blob := append(append([]byte{}, basicData...), codeHash...)
+ batch := db.NewBatch()
+ codec.WriteAccount(batch, codec.AccountKey(addr), blob)
+ flushBatch(t, batch)
+
+ // Now write a header storage slot. Slot 0 (per EIP-7864) lives at
+ // offset 64 within the SAME stem as BasicData, so this is a
+ // read-modify-write on the existing stem blob.
+ slot := common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000000")
+ storageValue := bytes.Repeat([]byte{0xCC}, stemBlobValueSize)
+ acctKey, storageKey := codec.StorageKey(addr, slot)
+ batch = db.NewBatch()
+ codec.WriteStorage(batch, acctKey, storageKey, storageValue)
+ flushBatch(t, batch)
+
+ // All three offsets should now be readable.
+ accountBlob := codec.ReadAccount(db, codec.AccountKey(addr))
+ gotBasic, _ := extractStemOffset(accountBlob, bintrie.BasicDataLeafKey)
+ if !bytes.Equal(gotBasic, basicData) {
+ t.Fatalf("BasicData lost after storage write: got %x, want %x", gotBasic, basicData)
+ }
+ gotCode, _ := extractStemOffset(accountBlob, bintrie.CodeHashLeafKey)
+ if !bytes.Equal(gotCode, codeHash) {
+ t.Fatalf("CodeHash lost after storage write: got %x, want %x", gotCode, codeHash)
+ }
+ gotStorage := codec.ReadStorage(db, acctKey, storageKey)
+ if !bytes.Equal(gotStorage, storageValue) {
+ t.Fatalf("Storage: got %x, want %x", gotStorage, storageValue)
+ }
+}
+
+// TestBintrieCodecDeleteAccount verifies that DeleteAccount clears only
+// offsets 0 (BasicData) and 1 (CodeHash) at the account's stem, leaving
+// any other offsets (e.g. header storage slots) at the same stem
+// untouched. This mirrors BinaryTrie.DeleteAccount's intended semantics.
+func TestBintrieCodecDeleteAccount(t *testing.T) {
+ codec, db := newTestBintrieCodec(t)
+ addr := common.HexToAddress("0x4444444444444444444444444444444444444444")
+
+ // Populate account (offsets 0+1) and one header storage slot (offset 64).
+ basicData := bytes.Repeat([]byte{0xAA}, stemBlobValueSize)
+ codeHash := bytes.Repeat([]byte{0xBB}, stemBlobValueSize)
+ batch := db.NewBatch()
+ codec.WriteAccount(batch, codec.AccountKey(addr), append(basicData, codeHash...))
+ flushBatch(t, batch)
+
+ storageValue := bytes.Repeat([]byte{0xCC}, stemBlobValueSize)
+ slot := common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000000")
+ acctKey, storageKey := codec.StorageKey(addr, slot)
+ batch = db.NewBatch()
+ codec.WriteStorage(batch, acctKey, storageKey, storageValue)
+ flushBatch(t, batch)
+
+ // Delete the account. Offsets 0 and 1 should be cleared; the
+ // header storage slot at offset 64 should survive.
+ batch = db.NewBatch()
+ codec.DeleteAccount(batch, codec.AccountKey(addr))
+ flushBatch(t, batch)
+
+ accountBlob := codec.ReadAccount(db, codec.AccountKey(addr))
+ if len(accountBlob) == 0 {
+ t.Fatal("stem blob was fully deleted; header storage should still be present")
+ }
+ if got, _ := extractStemOffset(accountBlob, bintrie.BasicDataLeafKey); got != nil {
+ t.Fatalf("BasicData not cleared: %x", got)
+ }
+ if got, _ := extractStemOffset(accountBlob, bintrie.CodeHashLeafKey); got != nil {
+ t.Fatalf("CodeHash not cleared: %x", got)
+ }
+ if got := codec.ReadStorage(db, acctKey, storageKey); !bytes.Equal(got, storageValue) {
+ t.Fatalf("header storage lost after DeleteAccount: got %x, want %x", got, storageValue)
+ }
+}
+
+// TestBintrieCodecDeleteLastOffsetRemovesKey verifies that when the
+// final populated offset at a stem is cleared, the on-disk key is
+// removed entirely (zero-length blobs are never persisted).
+func TestBintrieCodecDeleteLastOffsetRemovesKey(t *testing.T) {
+ codec, db := newTestBintrieCodec(t)
+ addr := common.HexToAddress("0x5555555555555555555555555555555555555555")
+ slot := common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000080")
+ value := bytes.Repeat([]byte{0xDD}, stemBlobValueSize)
+
+ acctKey, storageKey := codec.StorageKey(addr, slot)
+
+ // Write, verify, delete, verify absent.
+ batch := db.NewBatch()
+ codec.WriteStorage(batch, acctKey, storageKey, value)
+ flushBatch(t, batch)
+
+ if got := codec.ReadStorage(db, acctKey, storageKey); !bytes.Equal(got, value) {
+ t.Fatalf("pre-delete read: got %x, want %x", got, value)
+ }
+
+ batch = db.NewBatch()
+ codec.DeleteStorage(batch, acctKey, storageKey)
+ flushBatch(t, batch)
+
+ // The raw key should be gone from the store.
+ raw := rawdb.ReadBinTrieStem(db, stemFromKey(storageKey))
+ if raw != nil {
+ t.Fatalf("stem blob should be deleted, got %x", raw)
+ }
+ // And ReadStorage returns nil.
+ if got := codec.ReadStorage(db, acctKey, storageKey); got != nil {
+ t.Fatalf("post-delete read: got %x, want nil", got)
+ }
+}
+
+// TestBintrieCodecCacheKeysDisjoint verifies that the bintrie cache key
+// prefix keeps it disjoint from merkle account keys. This is the
+// collision check that Agent 2 flagged in the review.
+func TestBintrieCodecCacheKeysDisjoint(t *testing.T) {
+ codec := &bintrieFlatCodec{}
+ merkle := &merkleFlatCodec{}
+
+ // A 32-byte hash that, when passed to both codecs, would collide
+ // if the bintrie codec didn't prefix-disambiguate its cache keys.
+ hash := common.HexToHash("0xaabbccddeeff00112233445566778899aabbccddeeff00112233445566778899")
+
+ binKey := codec.AccountCacheKey(hash)
+ merkleKey := merkle.AccountCacheKey(hash)
+
+ if bytes.Equal(binKey, merkleKey) {
+ t.Fatalf("bintrie and merkle cache keys collided: both are %x", binKey)
+ }
+ if binKey[0] != bintrieCacheKeyPrefix {
+ t.Fatalf("bintrie cache key missing prefix byte: %x", binKey)
+ }
+}
+
+// TestBintrieCodecSplitMarker verifies the single-tier marker handling.
+// For merkle the marker is a two-tier (account, account+storage) pair;
+// for bintrie it's a single 32-byte stem key, so SplitMarker returns
+// the same slice twice.
+func TestBintrieCodecSplitMarker(t *testing.T) {
+ codec := &bintrieFlatCodec{}
+
+ // Nil marker.
+ acc, full := codec.SplitMarker(nil)
+ if acc != nil || full != nil {
+ t.Fatalf("nil marker: acc=%v full=%v, want nil/nil", acc, full)
+ }
+
+ // A 32-byte marker. Both halves point to the same bytes.
+ marker := bytes.Repeat([]byte{0xAA}, 32)
+ acc, full = codec.SplitMarker(marker)
+ if !bytes.Equal(acc, marker) || !bytes.Equal(full, marker) {
+ t.Fatalf("SplitMarker: acc=%x full=%x, want both %x", acc, full, marker)
+ }
+}
diff --git a/triedb/pathdb/stem_blob.go b/triedb/pathdb/stem_blob.go
new file mode 100644
index 0000000000..90aaa04c8f
--- /dev/null
+++ b/triedb/pathdb/stem_blob.go
@@ -0,0 +1,327 @@
+// Copyright 2026 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package pathdb
+
+import (
+ "errors"
+ "fmt"
+ "math/bits"
+
+ "github.com/ethereum/go-ethereum/common"
+)
+
+// Bintrie stem blob layout
+// ------------------------
+//
+// The flat-state representation of a bintrie stem packs the populated
+// (offset, 32-byte value) pairs at that stem into a single on-disk blob.
+// A stem holds up to 256 offsets (per EIP-7864, the full "stem group"),
+// but in practice only a handful are populated for any given account
+// (BasicData at offset 0, CodeHash at offset 1, a few storage slots, or
+// code chunks). A dense encoding would waste 8 KB per stem; this layout
+// scales linearly with the number of populated offsets.
+//
+// Layout:
+//
+// [ 0 .. 31 ] 32-byte bitmap; bit i set iff offset i has a value
+// [32 .. 63 ] first populated offset's 32-byte value
+// [64 .. 95 ] second populated offset's 32-byte value
+// ...
+// [32 + 32*(N-1) .. 32 + 32*N - 1] N-th populated offset's value
+//
+// where N = popcount(bitmap). Values appear in increasing offset order,
+// which is the iteration order of the bitmap bits from least- to
+// most-significant byte (byte 0 first, then byte 1, etc.), and within
+// each byte from MSB (offset b*8) to LSB (offset b*8+7).
+//
+// An "absent" offset is one whose bitmap bit is clear; an offset whose
+// value is 32 zero bytes is "present with zero value" — that is the
+// tombstone convention used by BinaryTrie.DeleteStorage, which writes
+// 32 zero bytes to mark a slot as cleared without removing it from the
+// underlying StemNode's Values slice.
+//
+// An empty stem (all bits clear) is represented by a zero-length blob,
+// and callers must delete the on-disk key rather than write a zero-length
+// value.
+const (
+ stemBlobBitmapSize = 32 // bytes
+ stemBlobBitmapBits = stemBlobBitmapSize * 8 // 256
+ stemBlobValueSize = common.HashLength // 32
+)
+
+// stemOffsetMax is the highest valid offset within a bintrie stem.
+const stemOffsetMax = stemBlobBitmapBits - 1 // 255
+
+var (
+ errStemBlobTooShort = errors.New("stem blob shorter than bitmap")
+ errStemBlobMalformed = errors.New("stem blob length does not match bitmap popcount")
+ errStemBlobValueOutOfRange = errors.New("stem blob value slice out of range")
+)
+
+// encodeStemBlob encodes a bitmap and a dense values slice (one entry per
+// set bit, in ascending offset order) into the wire format described at
+// the top of this file.
+//
+// The caller must ensure len(values) == popcount(bitmap) and that every
+// entry in values has len == 32. If every bitmap bit is clear the function
+// returns nil so the caller knows to delete the on-disk key.
+func encodeStemBlob(bitmap [stemBlobBitmapSize]byte, values [][]byte) ([]byte, error) {
+ count := bitmapPopcount(bitmap)
+ if count != len(values) {
+ return nil, fmt.Errorf("stem blob popcount=%d values=%d: %w", count, len(values), errStemBlobMalformed)
+ }
+ if count == 0 {
+ return nil, nil
+ }
+ out := make([]byte, stemBlobBitmapSize+count*stemBlobValueSize)
+ copy(out, bitmap[:])
+ for i, v := range values {
+ if len(v) != stemBlobValueSize {
+ return nil, fmt.Errorf("stem blob value %d has len %d: %w", i, len(v), errStemBlobMalformed)
+ }
+ copy(out[stemBlobBitmapSize+i*stemBlobValueSize:], v)
+ }
+ return out, nil
+}
+
+// decodeStemBlob parses a raw stem blob into its bitmap and an ordered
+// slice of populated 32-byte values. The returned values alias the input
+// slice; callers must not retain or mutate them without copying first.
+//
+// A nil or zero-length blob decodes to a zero bitmap and no values
+// (equivalent to "no offsets present").
+func decodeStemBlob(blob []byte) ([stemBlobBitmapSize]byte, [][]byte, error) {
+ var bitmap [stemBlobBitmapSize]byte
+ if len(blob) == 0 {
+ return bitmap, nil, nil
+ }
+ if len(blob) < stemBlobBitmapSize {
+ return bitmap, nil, errStemBlobTooShort
+ }
+ copy(bitmap[:], blob[:stemBlobBitmapSize])
+ count := bitmapPopcount(bitmap)
+ expected := stemBlobBitmapSize + count*stemBlobValueSize
+ if len(blob) != expected {
+ return bitmap, nil, fmt.Errorf("stem blob len=%d popcount=%d expected=%d: %w", len(blob), count, expected, errStemBlobMalformed)
+ }
+ if count == 0 {
+ return bitmap, nil, nil
+ }
+ values := make([][]byte, count)
+ for i := range values {
+ start := stemBlobBitmapSize + i*stemBlobValueSize
+ values[i] = blob[start : start+stemBlobValueSize]
+ }
+ return bitmap, values, nil
+}
+
+// extractStemOffset returns the 32-byte value at the given offset within
+// a stem blob, or nil if the offset is not present. It does not allocate;
+// the returned slice aliases the input blob and must not be mutated.
+//
+// Returns an error only if the blob itself is malformed. An absent offset
+// in a well-formed blob is (nil, nil) — not an error.
+func extractStemOffset(blob []byte, offset byte) ([]byte, error) {
+ if len(blob) == 0 {
+ return nil, nil
+ }
+ if len(blob) < stemBlobBitmapSize {
+ return nil, errStemBlobTooShort
+ }
+ var bitmap [stemBlobBitmapSize]byte
+ copy(bitmap[:], blob[:stemBlobBitmapSize])
+
+ // Is the offset present at all?
+ if !bitmapGet(bitmap, offset) {
+ return nil, nil
+ }
+ // Count how many set bits precede this offset to find the value slot.
+ idx := bitmapRank(bitmap, offset)
+ start := stemBlobBitmapSize + idx*stemBlobValueSize
+ end := start + stemBlobValueSize
+ if end > len(blob) {
+ return nil, errStemBlobValueOutOfRange
+ }
+ return blob[start:end], nil
+}
+
+// stemBuilder accumulates (offset, value) pairs and produces a stem blob.
+// It supports loading an existing blob, setting individual offsets, and
+// emitting the final encoded form.
+//
+// Setting a value of nil or an empty slice clears the corresponding bit
+// from the bitmap (the offset becomes "absent"). Setting a non-nil
+// 32-byte slice — including 32 zero bytes — marks the offset present
+// with that value. This preserves the distinction between absent and
+// tombstoned-with-zero used elsewhere in the bintrie code.
+//
+// A stemBuilder is not safe for concurrent use.
+type stemBuilder struct {
+ bitmap [stemBlobBitmapSize]byte
+ // values stores the current value at each offset, or nil if absent.
+ // Using a fixed 256-entry array avoids allocation churn as offsets
+ // are set and cleared.
+ values [stemBlobBitmapBits][]byte
+}
+
+// newStemBuilder returns an empty stemBuilder.
+func newStemBuilder() *stemBuilder {
+ return &stemBuilder{}
+}
+
+// loadFromBlob merges the entries of the given stem blob into the builder.
+// Existing entries at the same offsets are overwritten. An empty blob is
+// a no-op.
+func (b *stemBuilder) loadFromBlob(blob []byte) error {
+ if len(blob) == 0 {
+ return nil
+ }
+ bitmap, values, err := decodeStemBlob(blob)
+ if err != nil {
+ return err
+ }
+ // Walk the bitmap and copy each populated offset into the builder,
+ // stepping the values index in sync.
+ var vi int
+ for offset := range stemBlobBitmapBits {
+ if !bitmapGet(bitmap, byte(offset)) {
+ continue
+ }
+ // decodeStemBlob returns slices aliasing the input blob; we take
+ // an owning copy so the builder survives the caller mutating or
+ // releasing the source blob.
+ v := make([]byte, stemBlobValueSize)
+ copy(v, values[vi])
+ b.values[offset] = v
+ b.bitmap[offset/8] |= 1 << (7 - uint(offset%8))
+ vi++
+ }
+ return nil
+}
+
+// set writes value at the given offset. A nil or empty-length value
+// clears the offset (bitmap bit cleared). A non-nil 32-byte value sets
+// the offset present with that value. Setting with any other length
+// panics — callers are expected to always pass 32-byte values.
+func (b *stemBuilder) set(offset byte, value []byte) {
+ if len(value) == 0 {
+ b.values[offset] = nil
+ b.bitmap[offset/8] &^= 1 << (7 - uint(offset%8))
+ return
+ }
+ if len(value) != stemBlobValueSize {
+ panic(fmt.Sprintf("stemBuilder: value at offset %d has len %d, want %d", offset, len(value), stemBlobValueSize))
+ }
+ // Own the bytes so later caller mutations don't aliasing-surprise us.
+ owned := make([]byte, stemBlobValueSize)
+ copy(owned, value)
+ b.values[offset] = owned
+ b.bitmap[offset/8] |= 1 << (7 - uint(offset%8))
+}
+
+// empty reports whether no offsets are currently populated in the builder.
+func (b *stemBuilder) empty() bool {
+ return bitmapPopcount(b.bitmap) == 0
+}
+
+// encode produces the stem blob encoding for the builder's current state.
+// Returns nil for an empty builder so the caller can decide to delete the
+// on-disk key rather than write a zero-length value.
+func (b *stemBuilder) encode() []byte {
+ count := bitmapPopcount(b.bitmap)
+ if count == 0 {
+ return nil
+ }
+ out := make([]byte, stemBlobBitmapSize+count*stemBlobValueSize)
+ copy(out, b.bitmap[:])
+
+ // Walk the bitmap in ascending order, copying each populated value.
+ pos := stemBlobBitmapSize
+ for offset := range stemBlobBitmapBits {
+ if b.values[offset] == nil {
+ continue
+ }
+ copy(out[pos:], b.values[offset])
+ pos += stemBlobValueSize
+ }
+ return out
+}
+
+// reset clears all entries in the builder.
+func (b *stemBuilder) reset() {
+ b.bitmap = [stemBlobBitmapSize]byte{}
+ b.values = [stemBlobBitmapBits][]byte{}
+}
+
+// stemOffsetValue is a single (offset, value) pair passed to mergeStemBlob.
+// A nil Value clears the offset.
+type stemOffsetValue struct {
+ Offset byte
+ Value []byte
+}
+
+// mergeStemBlob performs a read-modify-write on a stem blob: it decodes
+// the existing blob (if any), applies the given writes in order, and
+// returns a freshly encoded blob. Returns (nil, nil) when the result is
+// empty — the caller should delete the on-disk key in that case.
+func mergeStemBlob(existing []byte, writes []stemOffsetValue) ([]byte, error) {
+ b := newStemBuilder()
+ if err := b.loadFromBlob(existing); err != nil {
+ return nil, err
+ }
+ for _, w := range writes {
+ b.set(w.Offset, w.Value)
+ }
+ return b.encode(), nil
+}
+
+// bitmapPopcount returns the number of set bits in the 32-byte bitmap.
+func bitmapPopcount(bitmap [stemBlobBitmapSize]byte) int {
+ var n int
+ for _, b := range bitmap {
+ n += bits.OnesCount8(b)
+ }
+ return n
+}
+
+// bitmapGet returns whether bit `offset` is set in the bitmap. The
+// convention mirrors the bintrie: bit index `offset` lives in byte
+// `offset/8`, with the MSB of that byte corresponding to the lowest
+// in-byte offset (`offset%8 == 0`).
+func bitmapGet(bitmap [stemBlobBitmapSize]byte, offset byte) bool {
+ return bitmap[offset/8]&(1<<(7-uint(offset%8))) != 0
+}
+
+// bitmapRank returns the number of set bits that come strictly before
+// `offset` (in ascending offset order). The offset itself does not count.
+func bitmapRank(bitmap [stemBlobBitmapSize]byte, offset byte) int {
+ // Full whole bytes before the target.
+ byteIdx := int(offset) / 8
+ var rank int
+ for i := range byteIdx {
+ rank += bits.OnesCount8(bitmap[i])
+ }
+ // Bits within the target byte that are above the target's bit.
+ bitIdx := offset % 8
+ if bitIdx > 0 {
+ // The MSB is offset%8==0. We want bits 0..bitIdx-1 in that layout,
+ // which are the top bitIdx bits of the byte.
+ mask := byte(0xFF << (8 - bitIdx))
+ rank += bits.OnesCount8(bitmap[byteIdx] & mask)
+ }
+ return rank
+}
diff --git a/triedb/pathdb/stem_blob_test.go b/triedb/pathdb/stem_blob_test.go
new file mode 100644
index 0000000000..da57cf144f
--- /dev/null
+++ b/triedb/pathdb/stem_blob_test.go
@@ -0,0 +1,361 @@
+// Copyright 2026 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package pathdb
+
+import (
+ "bytes"
+ "testing"
+)
+
+// mkval constructs a 32-byte value where the first byte is tag and the
+// rest are zero. Used to make test assertions easy to read.
+func mkval(tag byte) []byte {
+ v := make([]byte, stemBlobValueSize)
+ v[0] = tag
+ return v
+}
+
+// TestStemBlobEmpty verifies that a builder with no entries encodes to
+// nil (so callers delete the key) and decodes back to a zero bitmap and
+// no values.
+func TestStemBlobEmpty(t *testing.T) {
+ b := newStemBuilder()
+ if !b.empty() {
+ t.Fatal("fresh builder should be empty")
+ }
+ blob := b.encode()
+ if blob != nil {
+ t.Fatalf("empty builder should encode to nil, got %x", blob)
+ }
+
+ // Decode nil and empty slice both yield an empty result.
+ for _, input := range [][]byte{nil, {}} {
+ bitmap, values, err := decodeStemBlob(input)
+ if err != nil {
+ t.Fatalf("decode empty: %v", err)
+ }
+ if values != nil {
+ t.Fatalf("decode empty values: got %v, want nil", values)
+ }
+ for i, b := range bitmap {
+ if b != 0 {
+ t.Fatalf("decode empty bitmap byte %d: got 0x%02x, want 0", i, b)
+ }
+ }
+ }
+}
+
+// TestStemBlobBasicDataAndCodeHash verifies the "account header" encoding
+// pattern: offsets 0 and 1 populated. This is the common case for every
+// account update.
+func TestStemBlobBasicDataAndCodeHash(t *testing.T) {
+ b := newStemBuilder()
+ basicData := mkval(0xAA)
+ codeHash := mkval(0xBB)
+ b.set(0, basicData)
+ b.set(1, codeHash)
+
+ if b.empty() {
+ t.Fatal("builder should not be empty after two sets")
+ }
+
+ blob := b.encode()
+ if blob == nil {
+ t.Fatal("encode should not return nil for populated builder")
+ }
+ if got, want := len(blob), stemBlobBitmapSize+2*stemBlobValueSize; got != want {
+ t.Fatalf("blob length: got %d, want %d", got, want)
+ }
+
+ // Roundtrip through decodeStemBlob.
+ bitmap, values, err := decodeStemBlob(blob)
+ if err != nil {
+ t.Fatalf("decode: %v", err)
+ }
+ if got := bitmapPopcount(bitmap); got != 2 {
+ t.Fatalf("popcount: got %d, want 2", got)
+ }
+ if !bitmapGet(bitmap, 0) || !bitmapGet(bitmap, 1) {
+ t.Fatalf("bitmap missing offset 0 or 1: %x", bitmap)
+ }
+ if !bytes.Equal(values[0], basicData) {
+ t.Fatalf("value[0]: got %x, want %x", values[0], basicData)
+ }
+ if !bytes.Equal(values[1], codeHash) {
+ t.Fatalf("value[1]: got %x, want %x", values[1], codeHash)
+ }
+
+ // Point reads via extractStemOffset.
+ got, err := extractStemOffset(blob, 0)
+ if err != nil {
+ t.Fatalf("extract offset 0: %v", err)
+ }
+ if !bytes.Equal(got, basicData) {
+ t.Fatalf("extract 0: got %x, want %x", got, basicData)
+ }
+ got, err = extractStemOffset(blob, 1)
+ if err != nil {
+ t.Fatalf("extract offset 1: %v", err)
+ }
+ if !bytes.Equal(got, codeHash) {
+ t.Fatalf("extract 1: got %x, want %x", got, codeHash)
+ }
+ // An unset offset returns (nil, nil).
+ got, err = extractStemOffset(blob, 42)
+ if err != nil {
+ t.Fatalf("extract unset offset: %v", err)
+ }
+ if got != nil {
+ t.Fatalf("extract unset: got %x, want nil", got)
+ }
+}
+
+// TestStemBlobAllOffsets verifies that a fully-populated stem (all 256
+// offsets) encodes and decodes correctly. This is the worst-case size.
+func TestStemBlobAllOffsets(t *testing.T) {
+ b := newStemBuilder()
+ for i := range stemBlobBitmapBits {
+ b.set(byte(i), mkval(byte(i)))
+ }
+ blob := b.encode()
+ expectedLen := stemBlobBitmapSize + stemBlobBitmapBits*stemBlobValueSize
+ if len(blob) != expectedLen {
+ t.Fatalf("blob length: got %d, want %d", len(blob), expectedLen)
+ }
+
+ bitmap, _, err := decodeStemBlob(blob)
+ if err != nil {
+ t.Fatalf("decode: %v", err)
+ }
+ if bitmapPopcount(bitmap) != stemBlobBitmapBits {
+ t.Fatalf("popcount: got %d, want %d", bitmapPopcount(bitmap), stemBlobBitmapBits)
+ }
+ for i := range stemBlobBitmapBits {
+ got, err := extractStemOffset(blob, byte(i))
+ if err != nil {
+ t.Fatalf("extract %d: %v", i, err)
+ }
+ if got[0] != byte(i) {
+ t.Fatalf("extract %d: tag 0x%02x, want 0x%02x", i, got[0], byte(i))
+ }
+ }
+}
+
+// TestStemBlobSparseHighOffsets verifies that non-contiguous offsets
+// (typical for storage slots scattered across the stem) round-trip
+// correctly.
+func TestStemBlobSparseHighOffsets(t *testing.T) {
+ b := newStemBuilder()
+ offsets := []byte{3, 17, 64, 127, 128, 200, 255}
+ for _, o := range offsets {
+ b.set(o, mkval(o))
+ }
+ blob := b.encode()
+ if len(blob) != stemBlobBitmapSize+len(offsets)*stemBlobValueSize {
+ t.Fatalf("unexpected blob length: %d", len(blob))
+ }
+
+ // Extract each and verify, including some absent offsets in between.
+ for _, o := range offsets {
+ got, err := extractStemOffset(blob, o)
+ if err != nil {
+ t.Fatalf("extract %d: %v", o, err)
+ }
+ if got[0] != o {
+ t.Fatalf("extract %d: tag 0x%02x, want 0x%02x", o, got[0], o)
+ }
+ }
+ // Spot-check absent offsets between populated ones.
+ for _, o := range []byte{0, 1, 2, 4, 18, 63, 126, 129, 199, 254} {
+ got, err := extractStemOffset(blob, o)
+ if err != nil {
+ t.Fatalf("extract absent %d: %v", o, err)
+ }
+ if got != nil {
+ t.Fatalf("extract absent %d: got %x, want nil", o, got)
+ }
+ }
+}
+
+// TestStemBlobSetClearRoundtrip verifies that setting and then clearing
+// an offset leaves the builder in the same state as never setting it.
+func TestStemBlobSetClearRoundtrip(t *testing.T) {
+ b := newStemBuilder()
+ b.set(5, mkval(0xCD))
+ if b.empty() {
+ t.Fatal("should not be empty after set")
+ }
+ b.set(5, nil)
+ if !b.empty() {
+ t.Fatal("should be empty after clearing the only entry")
+ }
+ if blob := b.encode(); blob != nil {
+ t.Fatalf("encode after clear: got %x, want nil", blob)
+ }
+}
+
+// TestStemBlobLoadFromBlob verifies that an existing blob can be loaded
+// into a fresh builder for read-modify-write semantics.
+func TestStemBlobLoadFromBlob(t *testing.T) {
+ // Build an initial blob with two entries.
+ b1 := newStemBuilder()
+ b1.set(0, mkval(0x11))
+ b1.set(64, mkval(0x22))
+ initial := b1.encode()
+
+ // Load into a fresh builder, modify, encode.
+ b2 := newStemBuilder()
+ if err := b2.loadFromBlob(initial); err != nil {
+ t.Fatalf("loadFromBlob: %v", err)
+ }
+ b2.set(0, mkval(0x33)) // overwrite offset 0
+ b2.set(64, nil) // clear offset 64
+ b2.set(128, mkval(0x44)) // add offset 128
+ updated := b2.encode()
+
+ // Offset 0 should have the new value.
+ got, err := extractStemOffset(updated, 0)
+ if err != nil || got == nil || got[0] != 0x33 {
+ t.Fatalf("offset 0 after update: got %x err=%v, want tag 0x33", got, err)
+ }
+ // Offset 64 should be absent.
+ got, err = extractStemOffset(updated, 64)
+ if err != nil {
+ t.Fatalf("offset 64 after clear: %v", err)
+ }
+ if got != nil {
+ t.Fatalf("offset 64 after clear: got %x, want nil", got)
+ }
+ // Offset 128 should have the new value.
+ got, err = extractStemOffset(updated, 128)
+ if err != nil || got == nil || got[0] != 0x44 {
+ t.Fatalf("offset 128 after update: got %x err=%v, want tag 0x44", got, err)
+ }
+}
+
+// TestStemBlobMergeHelper verifies mergeStemBlob: read existing, apply
+// writes, produce new blob in one call.
+func TestStemBlobMergeHelper(t *testing.T) {
+ // Start with a blob containing offset 0.
+ b := newStemBuilder()
+ b.set(0, mkval(0x01))
+ initial := b.encode()
+
+ // Merge: overwrite 0, add 1, clear a non-existent offset (no-op).
+ result, err := mergeStemBlob(initial, []stemOffsetValue{
+ {Offset: 0, Value: mkval(0x02)},
+ {Offset: 1, Value: mkval(0x03)},
+ {Offset: 100, Value: nil},
+ })
+ if err != nil {
+ t.Fatalf("merge: %v", err)
+ }
+ got, _ := extractStemOffset(result, 0)
+ if got == nil || got[0] != 0x02 {
+ t.Fatalf("merged offset 0: got %x, want tag 0x02", got)
+ }
+ got, _ = extractStemOffset(result, 1)
+ if got == nil || got[0] != 0x03 {
+ t.Fatalf("merged offset 1: got %x, want tag 0x03", got)
+ }
+}
+
+// TestStemBlobMergeToEmpty verifies that clearing every populated entry
+// via merge returns a nil blob (so the caller deletes the key).
+func TestStemBlobMergeToEmpty(t *testing.T) {
+ b := newStemBuilder()
+ b.set(0, mkval(0x01))
+ b.set(5, mkval(0x02))
+ initial := b.encode()
+
+ result, err := mergeStemBlob(initial, []stemOffsetValue{
+ {Offset: 0, Value: nil},
+ {Offset: 5, Value: nil},
+ })
+ if err != nil {
+ t.Fatalf("merge to empty: %v", err)
+ }
+ if result != nil {
+ t.Fatalf("merge to empty: got %x, want nil", result)
+ }
+}
+
+// TestStemBlobTombstoneZeroBytes verifies that a 32-byte zero value is
+// preserved as "present with zero value" — not confused with "absent".
+// DeleteStorage uses this convention.
+func TestStemBlobTombstoneZeroBytes(t *testing.T) {
+ b := newStemBuilder()
+ zeros := make([]byte, stemBlobValueSize)
+ b.set(64, zeros)
+ if b.empty() {
+ t.Fatal("zero-value entry should count as populated")
+ }
+ blob := b.encode()
+ got, err := extractStemOffset(blob, 64)
+ if err != nil {
+ t.Fatalf("extract tombstone: %v", err)
+ }
+ if !bytes.Equal(got, zeros) {
+ t.Fatalf("extract tombstone: got %x, want 32 zero bytes", got)
+ }
+}
+
+// TestStemBlobMalformedInput verifies that decodeStemBlob detects
+// malformed blobs with wrong lengths.
+func TestStemBlobMalformedInput(t *testing.T) {
+ // Shorter than bitmap.
+ if _, _, err := decodeStemBlob(make([]byte, 10)); err == nil {
+ t.Fatal("expected error for too-short blob")
+ }
+ // Bitmap claims 2 entries but blob only has room for 1.
+ var bitmap [stemBlobBitmapSize]byte
+ bitmap[0] = 0xC0 // bits 0 and 1 set → 2 entries
+ short := make([]byte, stemBlobBitmapSize+stemBlobValueSize)
+ copy(short, bitmap[:])
+ if _, _, err := decodeStemBlob(short); err == nil {
+ t.Fatal("expected error for blob shorter than bitmap implies")
+ }
+}
+
+// TestBitmapRank sanity-checks the bit-to-index helper used by
+// extractStemOffset for single-offset reads.
+func TestBitmapRank(t *testing.T) {
+ var bitmap [stemBlobBitmapSize]byte
+ // Set bits at offsets 0, 1, 5, 64, 200.
+ for _, o := range []byte{0, 1, 5, 64, 200} {
+ bitmap[o/8] |= 1 << (7 - uint(o%8))
+ }
+ cases := []struct {
+ offset byte
+ want int
+ }{
+ {0, 0}, // first set bit is at index 0
+ {1, 1}, // second set bit
+ {5, 2}, // third
+ {64, 3}, // fourth
+ {200, 4}, // fifth
+ // For an unset offset, rank returns the number of set bits < it.
+ {2, 2}, // bits 0 and 1 are before 2
+ {100, 4}, // bits 0,1,5,64 are before 100
+ {255, 5}, // all five bits are before 255
+ }
+ for _, c := range cases {
+ if got := bitmapRank(bitmap, c.offset); got != c.want {
+ t.Errorf("bitmapRank(%d) = %d, want %d", c.offset, got, c.want)
+ }
+ }
+}