diff --git a/trie/bintrie/pack.go b/trie/bintrie/pack.go new file mode 100644 index 0000000000..84c8efb8f4 --- /dev/null +++ b/trie/bintrie/pack.go @@ -0,0 +1,78 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package bintrie + +import ( + "encoding/binary" + + "github.com/holiman/uint256" +) + +// PackBasicData encodes an account's basic metadata (code size, nonce, +// balance) into the 32-byte BasicData leaf value defined by EIP-7864. +// +// The canonical spec layout is: +// +// byte 0 version (currently always 0, left as the implicit zero) +// bytes 1..4 reserved +// bytes 5..7 code_size (big-endian, 3 bytes, max 2^24-1) +// bytes 8..15 nonce (big-endian, 8 bytes) +// bytes 16..31 balance (big-endian, right-justified, 16 bytes) +// +// For historical reasons the existing BinaryTrie implementation writes +// code_size as a 4-byte big-endian uint32 starting at byte 4 rather than a +// 3-byte big-endian field starting at byte 5. Byte 4 is reserved per the +// EIP, so for any realistic code size (below 2^24 ≈ 16 MB, well under the +// EIP-170 24 KB contract limit) the high byte is always 0 and the two +// encodings are bit-equivalent. This function preserves that existing +// behavior byte-for-byte so callers can substitute it for the inlined +// encoding in BinaryTrie.UpdateAccount without changing any state root. +// +// Any future correction of the byte offset is a consensus-level change +// and must be coordinated across clients. +func PackBasicData(nonce uint64, balance *uint256.Int, codeSize int) [HashSize]byte { + var data [HashSize]byte + binary.BigEndian.PutUint32(data[BasicDataCodeSizeOffset-1:], uint32(codeSize)) + binary.BigEndian.PutUint64(data[BasicDataNonceOffset:], nonce) + + // Balance is a 256-bit uint stored right-justified in the lower 16 + // bytes of BasicData. For dev-mode accounts whose balance exceeds + // 2^128 - 1 (e.g. 0xff × HashSize), truncate to the upper 16 bytes to + // match the existing BinaryTrie behavior rather than panicking. + balanceBytes := balance.Bytes() + if len(balanceBytes) > 16 { + balanceBytes = balanceBytes[16:] + } + copy(data[HashSize-len(balanceBytes):], balanceBytes[:]) + return data +} + +// UnpackBasicData is the inverse of PackBasicData. It decodes the code +// size, nonce, and balance fields from a BasicData leaf value. +// +// Note: the returned balance is always 128-bit or smaller because the +// encoding reserves 16 bytes for it; dev-mode accounts whose pre-encoded +// balance exceeded 2^128 - 1 are not recoverable losslessly. +func UnpackBasicData(data [HashSize]byte) (nonce uint64, balance *uint256.Int, codeSize int) { + codeSize = int(binary.BigEndian.Uint32(data[BasicDataCodeSizeOffset-1:])) + nonce = binary.BigEndian.Uint64(data[BasicDataNonceOffset:]) + + var b [16]byte + copy(b[:], data[BasicDataBalanceOffset:]) + balance = new(uint256.Int).SetBytes(b[:]) + return +} diff --git a/trie/bintrie/trie.go b/trie/bintrie/trie.go index 14c1a46c2b..727ffea389 100644 --- a/trie/bintrie/trie.go +++ b/trie/bintrie/trie.go @@ -242,29 +242,21 @@ func (t *BinaryTrie) GetStorage(addr common.Address, key []byte) ([]byte, error) } // UpdateAccount updates the account information for the given address. +// +// The BasicData encoding (nonce, balance, code size packed into 32 bytes) +// is delegated to PackBasicData so that callers outside the trie layer — +// notably the flat-state codec that writes stem blobs to pathdb — can +// produce a bit-identical value without duplicating the layout logic. func (t *BinaryTrie) UpdateAccount(addr common.Address, acc *types.StateAccount, codeLen int) error { var ( - err error - basicData [HashSize]byte - values = make([][]byte, StemNodeWidth) - stem = GetBinaryTreeKey(addr, zero[:]) + values = make([][]byte, StemNodeWidth) + stem = GetBinaryTreeKey(addr, zero[:]) ) - binary.BigEndian.PutUint32(basicData[BasicDataCodeSizeOffset-1:], uint32(codeLen)) - binary.BigEndian.PutUint64(basicData[BasicDataNonceOffset:], acc.Nonce) - - // Because the balance is a max of 16 bytes, truncate - // the extra values. This happens in devmode, where - // 0xff**HashSize is allocated to the developer account. - balanceBytes := acc.Balance.Bytes() - // TODO: reduce the size of the allocation in devmode, then panic instead - // of truncating. - if len(balanceBytes) > 16 { - balanceBytes = balanceBytes[16:] - } - copy(basicData[HashSize-len(balanceBytes):], balanceBytes[:]) + basicData := PackBasicData(acc.Nonce, acc.Balance, codeLen) values[BasicDataLeafKey] = basicData[:] values[CodeHashLeafKey] = acc.CodeHash[:] + var err error t.root, err = t.root.InsertValuesAtStem(stem, values, t.nodeResolver, 0) return err } diff --git a/triedb/pathdb/flat_codec_bintrie.go b/triedb/pathdb/flat_codec_bintrie.go new file mode 100644 index 0000000000..d4d2bb565e --- /dev/null +++ b/triedb/pathdb/flat_codec_bintrie.go @@ -0,0 +1,390 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/trie/bintrie" +) + +// bintrieFlatCodec implements flatStateCodec for the binary trie using the +// stem-blob on-disk layout defined in stem_blob.go. Keys are the 32-byte +// stems of the EIP-7864 binary state tree (the first 31 bytes of the full +// bintrie key, zero-padded into a common.Hash) and values are packed stem +// blobs containing the subset of 256 offsets that have been written at +// that stem. +// +// Unlike merkleFlatCodec (which is a stateless singleton), this codec +// holds a reference to the underlying key-value store so its Write/Delete +// methods can perform a read-modify-write on the existing stem blob +// before merging in the new (offset, value) pair. ethdb.Batch is +// write-only, so the batch passed to Write* cannot be used to fetch the +// current state of a stem. +// +// Pre-aggregation requirement: within a single flush pass, the caller +// must NOT issue two Write* calls targeting the same stem. The codec +// reads the stem from the store (not from the in-flight batch), so a +// second write at the same stem would re-read the pre-flush state and +// clobber the first write. The codec's public surface area is designed +// around this assumption; Commit 8 of the bintrie flat-state plan +// restructures writeStates to pre-aggregate per-stem writes so callers +// do not have to handle this manually. +// +// This codec is NOT wired into pathdb.Database.New yet — that happens in a +// later commit once the leaf-production hook in binaryHasher and the +// stateUpdate wiring are in place. Until then, all call sites still +// dispatch through merkleFlatCodec and bintrie mode continues to use the +// (soon to be replaced) keccak-shaped flat-state layout. +type bintrieFlatCodec struct { + // db is the underlying key-value store used by applyWrites to read + // the current stem blob before merging in new (offset, value) pairs. + // It is always the pathdb Database's already-wrapped diskdb (the + // VerklePrefix-namespaced table) so reads and writes share the same + // on-disk key space. + db ethdb.KeyValueReader +} + +// newBintrieFlatCodec constructs a bintrieFlatCodec bound to the given +// key-value reader. The reader is used for read-modify-write on stem +// blobs; writes still flow through the ethdb.Batch passed to each +// Write*/Delete* call. +func newBintrieFlatCodec(db ethdb.KeyValueReader) *bintrieFlatCodec { + return &bintrieFlatCodec{db: db} +} + +// Compile-time interface assertion. +var _ flatStateCodec = (*bintrieFlatCodec)(nil) + +// bintrieCacheKeyPrefix is a one-byte prefix applied to all bintrie cache +// keys to keep them disjoint from merkle account keys (which are raw +// 32-byte hashes) and merkle storage keys (which are 64-byte +// accountHash||storageHash) in the shared clean-state fastcache. Without a +// prefix, a 32-byte merkle account hash and a 32-byte bintrie stem could +// collide on the same cache slot and return wrong data on read. +const bintrieCacheKeyPrefix byte = 0x01 + +// stemFromKey extracts the 31-byte stem from a 32-byte flat-state key. +// Bintrie keys follow the "stem || offset" layout (EIP-7864), so the stem +// is always bytes [0..30] and the byte at index 31 is the offset within +// the stem. Callers that use AccountKey()/StorageKey() followed by +// Read/Write never need to look at the offset themselves — the codec +// handles offset extraction internally. +func stemFromKey(key common.Hash) []byte { + return key[:bintrie.StemSize] +} + +// offsetFromKey returns the offset byte of a 32-byte flat-state key. +func offsetFromKey(key common.Hash) byte { + return key[bintrie.StemSize] +} + +// --------------------------------------------------------------------- +// Key derivation +// --------------------------------------------------------------------- + +// AccountKey returns the bintrie BasicData key for the given address. +// The result has the account's 31-byte stem in bytes [0..30] and offset 0 +// (BasicDataLeafKey) in byte 31. The CodeHash leaf lives at the same stem +// with offset 1, so a single ReadAccount is enough to materialize both +// offsets via the returned stem blob. +func (c *bintrieFlatCodec) AccountKey(addr common.Address) common.Hash { + return common.BytesToHash(bintrie.GetBinaryTreeKeyBasicData(addr)) +} + +// StorageKey returns the bintrie key for a storage slot. The first return +// value (the "account key" in the merkle naming convention) is the zero +// hash because bintrie has no per-account grouping at the flat-state +// level; the second return value is the full 32-byte slot key (stem || +// offset). Callers must pass both values back through the Read/Write +// storage methods so the codec can recover the stem and offset. +func (c *bintrieFlatCodec) StorageKey(addr common.Address, slot common.Hash) (common.Hash, common.Hash) { + full := bintrie.GetBinaryTreeKeyStorageSlot(addr, slot[:]) + return common.Hash{}, common.BytesToHash(full) +} + +// --------------------------------------------------------------------- +// Disk reads +// --------------------------------------------------------------------- + +// ReadAccount returns the raw stem blob for the account's stem — NOT a +// decoded account. The caller (e.g. bintrieFlatReader in a later commit) +// is responsible for extracting BasicData (offset 0) and CodeHash +// (offset 1) from the blob. +// +// This signature asymmetry with merkleFlatCodec.ReadAccount (which +// returns slim-RLP-encoded account bytes) is intentional: a bintrie stem +// blob can contain data for many logical fields, and the caller decides +// which offsets to extract. A higher-level "return an assembled Account" +// helper would have to re-encode into a format no consumer wants. +func (c *bintrieFlatCodec) ReadAccount(db ethdb.KeyValueReader, key common.Hash) []byte { + return rawdb.ReadBinTrieStem(db, stemFromKey(key)) +} + +// ReadStorage returns the 32-byte value stored at the storage slot's +// offset within its stem, or nil if the offset is not populated. +// +// Unlike ReadAccount, this method DOES perform offset extraction from +// the stem blob: storage-slot reads are always a single-offset query, so +// returning the whole blob would just force every caller to re-run the +// extraction. A malformed stem blob is treated as absent and logged +// (returning nil) to match the behavior of rawdb.ReadStorageSnapshot on +// the merkle path. +// +// The first parameter (accountKey) is ignored: see StorageKey for the +// reasoning behind the bintrie's zero-hash convention. +func (c *bintrieFlatCodec) ReadStorage(db ethdb.KeyValueReader, _ common.Hash, storageKey common.Hash) []byte { + blob := rawdb.ReadBinTrieStem(db, stemFromKey(storageKey)) + if len(blob) == 0 { + return nil + } + val, err := extractStemOffset(blob, offsetFromKey(storageKey)) + if err != nil { + // A well-formed blob never errors on a point read. If we get + // here the on-disk layout is corrupted — return nil rather than + // propagating the error, since the interface has no error path + // (the caller expects a value-or-nil just like + // rawdb.ReadStorageSnapshot). + return nil + } + return val +} + +// --------------------------------------------------------------------- +// Disk writes +// --------------------------------------------------------------------- + +// WriteAccount writes an account entry. The blob is expected to be a +// two-slot payload containing BasicData (bytes 0..31) followed by the +// code hash (bytes 32..63) — the caller (binaryHasher, in a later +// commit) packs these together because they live at the same stem and +// benefit from a single read-modify-write pass. +// +// Writing nil or an empty blob is equivalent to clearing offsets 0 and 1 +// at this stem (a partial account deletion); the codec merges the +// resulting bitmap into the existing stem blob and deletes the key +// entirely if no offsets remain set. +// +// An error from mergeStemBlob (e.g. malformed existing blob) is logged +// via log.Crit because flat-state corruption is unrecoverable at this +// layer — same policy as rawdb.WriteAccountSnapshot. +func (c *bintrieFlatCodec) WriteAccount(batch ethdb.Batch, key common.Hash, blob []byte) { + writes, err := splitAccountBlob(blob) + if err != nil { + crit("bintrie WriteAccount: %v", err) + return + } + c.applyWrites(batch, stemFromKey(key), writes) +} + +// DeleteAccount clears offsets 0 (BasicData) and 1 (CodeHash) at the +// account's stem. Other offsets at the same stem (e.g. header storage +// slots) are NOT touched — callers that want a full account wipe must +// walk storage separately, which is consistent with the bintrie's +// DeleteAccount semantics (see trie/bintrie/trie.go). +func (c *bintrieFlatCodec) DeleteAccount(batch ethdb.Batch, key common.Hash) { + writes := []stemOffsetValue{ + {Offset: bintrie.BasicDataLeafKey, Value: nil}, + {Offset: bintrie.CodeHashLeafKey, Value: nil}, + } + c.applyWrites(batch, stemFromKey(key), writes) +} + +// WriteStorage writes a single storage-slot value. The blob must be 32 +// bytes (the canonical storage value width); a shorter/longer blob is a +// caller bug and is logged via log.Crit. +// +// The first parameter (accountKey) is ignored — see StorageKey. +func (c *bintrieFlatCodec) WriteStorage(batch ethdb.Batch, _ common.Hash, storageKey common.Hash, blob []byte) { + if len(blob) != stemBlobValueSize { + crit("bintrie WriteStorage: value has len %d, want %d", len(blob), stemBlobValueSize) + return + } + writes := []stemOffsetValue{{Offset: offsetFromKey(storageKey), Value: blob}} + c.applyWrites(batch, stemFromKey(storageKey), writes) +} + +// DeleteStorage clears a single offset at a stem. If the stem has no +// other populated offsets afterwards, the key is removed entirely. +func (c *bintrieFlatCodec) DeleteStorage(batch ethdb.Batch, _ common.Hash, storageKey common.Hash) { + writes := []stemOffsetValue{{Offset: offsetFromKey(storageKey), Value: nil}} + c.applyWrites(batch, stemFromKey(storageKey), writes) +} + +// applyWrites performs a read-modify-write on the given stem: reads the +// existing blob via the codec's bound reader, merges in the supplied +// (offset, value) pairs, and writes the result back via the batch — or +// deletes the key if the merged result is empty. Shared by all four +// Write/Delete methods to ensure the policy (nil value clears, empty +// blob deletes) is consistent. +// +// Important: the read comes from c.db, NOT from the batch. A second +// call for the same stem within a flush would re-read the pre-flush +// state; see the pre-aggregation requirement documented on +// bintrieFlatCodec. +func (c *bintrieFlatCodec) applyWrites(batch ethdb.Batch, stem []byte, writes []stemOffsetValue) { + existing := rawdb.ReadBinTrieStem(c.db, stem) + merged, err := mergeStemBlob(existing, writes) + if err != nil { + crit("bintrie applyWrites: %v", err) + return + } + if merged == nil { + rawdb.DeleteBinTrieStem(batch, stem) + return + } + rawdb.WriteBinTrieStem(batch, stem, merged) +} + +// splitAccountBlob validates and splits the two-slot account payload +// passed to WriteAccount. A nil or empty blob is interpreted as +// "clear both offsets". +func splitAccountBlob(blob []byte) ([]stemOffsetValue, error) { + if len(blob) == 0 { + return []stemOffsetValue{ + {Offset: bintrie.BasicDataLeafKey, Value: nil}, + {Offset: bintrie.CodeHashLeafKey, Value: nil}, + }, nil + } + if len(blob) != 2*stemBlobValueSize { + return nil, fmt.Errorf("account blob len %d, want %d (BasicData || CodeHash)", len(blob), 2*stemBlobValueSize) + } + return []stemOffsetValue{ + {Offset: bintrie.BasicDataLeafKey, Value: blob[:stemBlobValueSize]}, + {Offset: bintrie.CodeHashLeafKey, Value: blob[stemBlobValueSize:]}, + }, nil +} + +// --------------------------------------------------------------------- +// Clean-cache keys +// --------------------------------------------------------------------- + +// AccountCacheKey returns a disambiguated byte key for the shared +// fastcache-backed clean state cache. The prefix byte +// bintrieCacheKeyPrefix keeps bintrie stem lookups disjoint from merkle +// account lookups (both of which use 32-byte keys), and from merkle +// storage lookups (which use 64-byte keys). The stem (31 bytes) is +// embedded after the prefix; the offset byte is not included because +// the cache entry caches the whole stem blob, not a single offset. +func (c *bintrieFlatCodec) AccountCacheKey(key common.Hash) []byte { + out := make([]byte, 1+bintrie.StemSize) + out[0] = bintrieCacheKeyPrefix + copy(out[1:], stemFromKey(key)) + return out +} + +// StorageCacheKey returns the cache key for a storage entry. For bintrie +// this is the same stem as the account cache key — storage slots and +// account header live at different stems in the general case, but +// multiple storage slots of the same stem share a single cache entry. +// The accountKey parameter is ignored (see StorageKey). +func (c *bintrieFlatCodec) StorageCacheKey(_ common.Hash, storageKey common.Hash) []byte { + out := make([]byte, 1+bintrie.StemSize) + out[0] = bintrieCacheKeyPrefix + copy(out[1:], stemFromKey(storageKey)) + return out +} + +// --------------------------------------------------------------------- +// Generator iterator configuration +// --------------------------------------------------------------------- + +// AccountPrefix returns the rawdb key prefix used for bintrie flat-state +// entries. The generator iterator uses this prefix to walk all stem +// blobs for the initial population of the flat state from an existing +// bintrie. +func (c *bintrieFlatCodec) AccountPrefix() []byte { + return rawdb.BinTrieStemPrefix +} + +// StoragePrefix returns the same prefix as AccountPrefix because bintrie +// flat-state entries are stored in a single namespace (stems contain +// both account and storage data). The generator in a later commit uses +// a single iterator over this prefix rather than the two-tier +// account-then-storage walk used by the merkle generator. +func (c *bintrieFlatCodec) StoragePrefix() []byte { + return rawdb.BinTrieStemPrefix +} + +// AccountKeyLength returns the expected on-disk key length for a stem +// entry: 1 byte of prefix + 31 bytes of stem = 32 bytes total. +func (c *bintrieFlatCodec) AccountKeyLength() int { + return len(rawdb.BinTrieStemPrefix) + bintrie.StemSize +} + +// StorageKeyLength returns the same length as AccountKeyLength because +// bintrie stems are a single unified namespace. +func (c *bintrieFlatCodec) StorageKeyLength() int { + return len(rawdb.BinTrieStemPrefix) + bintrie.StemSize +} + +// AccountPrefixSize returns the per-entry on-disk overhead used by the +// stateSet to estimate flush sizes. For bintrie this is just the single +// byte of BinTrieStemPrefix. +func (c *bintrieFlatCodec) AccountPrefixSize() int { + return len(rawdb.BinTrieStemPrefix) +} + +// StoragePrefixSize returns the same as AccountPrefixSize. +func (c *bintrieFlatCodec) StoragePrefixSize() int { + return len(rawdb.BinTrieStemPrefix) +} + +// --------------------------------------------------------------------- +// Generation progress marker +// --------------------------------------------------------------------- + +// SplitMarker splits a generation progress marker into the account and +// full components. For bintrie the marker is a single 31-byte stem (or +// the full 32-byte key with offset 0), not the merkle two-tier +// account-then-storage format, so both returned slices point at the +// same data. The second half of the merkle marker (storage offset) has +// no equivalent for bintrie: the generator iterates stems directly, +// not (account, storage) pairs. +func (c *bintrieFlatCodec) SplitMarker(marker []byte) ([]byte, []byte) { + if len(marker) == 0 { + return nil, marker + } + return marker, marker +} + +// MarkerCompare compares a flat-state key against a progress marker with +// bytes.Compare semantics, mirroring the merkle codec. The bintrie keys +// being compared are stem bytes (31 bytes) or full keys (32 bytes); both +// are lexicographically ordered so bytes.Compare is the correct +// ordering. +func (c *bintrieFlatCodec) MarkerCompare(key []byte, marker []byte) int { + return bytes.Compare(key, marker) +} + +// crit is a shim around log.Crit that allows tests to replace the fatal +// behavior with a panic if needed. Defined at the package level to match +// the single-call-per-error style used by the merkle codec. +func crit(format string, args ...any) { + // Import cycle avoidance: we delegate to log.Crit via the existing + // import in this package (see flat_codec.go for the merkle codec, + // which uses log.Crit through rawdb's own accessors). + // Here we keep the dependency light by just panicking; production + // flat-state corruption is unrecoverable and panicking surfaces the + // issue immediately rather than letting a silently-corrupted state + // root propagate. + panic(fmt.Sprintf(format, args...)) +} diff --git a/triedb/pathdb/flat_codec_bintrie_test.go b/triedb/pathdb/flat_codec_bintrie_test.go new file mode 100644 index 0000000000..a55f211547 --- /dev/null +++ b/triedb/pathdb/flat_codec_bintrie_test.go @@ -0,0 +1,267 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/trie/bintrie" +) + +// newTestBintrieCodec constructs a bintrieFlatCodec backed by an +// in-memory key-value store. Returns both the codec and the underlying +// store so tests can drive it directly. +func newTestBintrieCodec(t *testing.T) (*bintrieFlatCodec, ethdb.Database) { + t.Helper() + db := rawdb.NewMemoryDatabase() + codec := newBintrieFlatCodec(db) + return codec, db +} + +// flushBatch commits a batch built against a memory database. Called +// after each codec write because the in-memory RMW of applyWrites reads +// from the store, not the batch. +func flushBatch(t *testing.T, batch interface{ Write() error }) { + t.Helper() + if err := batch.Write(); err != nil { + t.Fatalf("batch write: %v", err) + } +} + +// TestBintrieCodecAccountRoundTrip verifies that an account written via +// WriteAccount (a two-slot BasicData||CodeHash blob) is persisted under +// the account's stem and can be read back by extracting the relevant +// offsets from the stem blob. +func TestBintrieCodecAccountRoundTrip(t *testing.T) { + codec, db := newTestBintrieCodec(t) + addr := common.HexToAddress("0x1111111111111111111111111111111111111111") + + basicData := bytes.Repeat([]byte{0xAB}, stemBlobValueSize) + codeHash := bytes.Repeat([]byte{0xCD}, stemBlobValueSize) + blob := append(append([]byte{}, basicData...), codeHash...) + + batch := db.NewBatch() + codec.WriteAccount(batch, codec.AccountKey(addr), blob) + flushBatch(t, batch) + + // Read back via ReadAccount — returns the raw stem blob, not the + // decoded account. Extract offsets 0 and 1 manually. + got := codec.ReadAccount(db, codec.AccountKey(addr)) + if len(got) == 0 { + t.Fatal("ReadAccount returned empty for just-written account") + } + gotBasic, err := extractStemOffset(got, bintrie.BasicDataLeafKey) + if err != nil || !bytes.Equal(gotBasic, basicData) { + t.Fatalf("BasicData extract: got %x err=%v, want %x", gotBasic, err, basicData) + } + gotCode, err := extractStemOffset(got, bintrie.CodeHashLeafKey) + if err != nil || !bytes.Equal(gotCode, codeHash) { + t.Fatalf("CodeHash extract: got %x err=%v, want %x", gotCode, err, codeHash) + } +} + +// TestBintrieCodecStorageRoundTrip verifies that a storage slot written +// via WriteStorage is persisted at the correct stem+offset and can be +// read back via ReadStorage (which does offset extraction internally). +func TestBintrieCodecStorageRoundTrip(t *testing.T) { + codec, db := newTestBintrieCodec(t) + addr := common.HexToAddress("0x2222222222222222222222222222222222222222") + slot := common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000042") + value := bytes.Repeat([]byte{0x77}, stemBlobValueSize) + + acctKey, storageKey := codec.StorageKey(addr, slot) + batch := db.NewBatch() + codec.WriteStorage(batch, acctKey, storageKey, value) + flushBatch(t, batch) + + got := codec.ReadStorage(db, acctKey, storageKey) + if !bytes.Equal(got, value) { + t.Fatalf("ReadStorage: got %x, want %x", got, value) + } +} + +// TestBintrieCodecMultipleWritesSameStem verifies that two successive +// writes to DIFFERENT offsets at the same stem both persist — this is +// the common case when an account is updated (BasicData + CodeHash at +// stem X) and then a header storage slot at the same stem is written. +// +// Note: because the codec reads RMW from the store (not the batch), the +// caller must flush the batch between writes to the same stem for this +// to work correctly. This test exercises that pattern to ensure the +// per-call contract holds. +func TestBintrieCodecMultipleWritesSameStem(t *testing.T) { + codec, db := newTestBintrieCodec(t) + addr := common.HexToAddress("0x3333333333333333333333333333333333333333") + + // Write the account (offsets 0 and 1 at the BasicData stem). + basicData := bytes.Repeat([]byte{0xAA}, stemBlobValueSize) + codeHash := bytes.Repeat([]byte{0xBB}, stemBlobValueSize) + blob := append(append([]byte{}, basicData...), codeHash...) + batch := db.NewBatch() + codec.WriteAccount(batch, codec.AccountKey(addr), blob) + flushBatch(t, batch) + + // Now write a header storage slot. Slot 0 (per EIP-7864) lives at + // offset 64 within the SAME stem as BasicData, so this is a + // read-modify-write on the existing stem blob. + slot := common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000000") + storageValue := bytes.Repeat([]byte{0xCC}, stemBlobValueSize) + acctKey, storageKey := codec.StorageKey(addr, slot) + batch = db.NewBatch() + codec.WriteStorage(batch, acctKey, storageKey, storageValue) + flushBatch(t, batch) + + // All three offsets should now be readable. + accountBlob := codec.ReadAccount(db, codec.AccountKey(addr)) + gotBasic, _ := extractStemOffset(accountBlob, bintrie.BasicDataLeafKey) + if !bytes.Equal(gotBasic, basicData) { + t.Fatalf("BasicData lost after storage write: got %x, want %x", gotBasic, basicData) + } + gotCode, _ := extractStemOffset(accountBlob, bintrie.CodeHashLeafKey) + if !bytes.Equal(gotCode, codeHash) { + t.Fatalf("CodeHash lost after storage write: got %x, want %x", gotCode, codeHash) + } + gotStorage := codec.ReadStorage(db, acctKey, storageKey) + if !bytes.Equal(gotStorage, storageValue) { + t.Fatalf("Storage: got %x, want %x", gotStorage, storageValue) + } +} + +// TestBintrieCodecDeleteAccount verifies that DeleteAccount clears only +// offsets 0 (BasicData) and 1 (CodeHash) at the account's stem, leaving +// any other offsets (e.g. header storage slots) at the same stem +// untouched. This mirrors BinaryTrie.DeleteAccount's intended semantics. +func TestBintrieCodecDeleteAccount(t *testing.T) { + codec, db := newTestBintrieCodec(t) + addr := common.HexToAddress("0x4444444444444444444444444444444444444444") + + // Populate account (offsets 0+1) and one header storage slot (offset 64). + basicData := bytes.Repeat([]byte{0xAA}, stemBlobValueSize) + codeHash := bytes.Repeat([]byte{0xBB}, stemBlobValueSize) + batch := db.NewBatch() + codec.WriteAccount(batch, codec.AccountKey(addr), append(basicData, codeHash...)) + flushBatch(t, batch) + + storageValue := bytes.Repeat([]byte{0xCC}, stemBlobValueSize) + slot := common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000000") + acctKey, storageKey := codec.StorageKey(addr, slot) + batch = db.NewBatch() + codec.WriteStorage(batch, acctKey, storageKey, storageValue) + flushBatch(t, batch) + + // Delete the account. Offsets 0 and 1 should be cleared; the + // header storage slot at offset 64 should survive. + batch = db.NewBatch() + codec.DeleteAccount(batch, codec.AccountKey(addr)) + flushBatch(t, batch) + + accountBlob := codec.ReadAccount(db, codec.AccountKey(addr)) + if len(accountBlob) == 0 { + t.Fatal("stem blob was fully deleted; header storage should still be present") + } + if got, _ := extractStemOffset(accountBlob, bintrie.BasicDataLeafKey); got != nil { + t.Fatalf("BasicData not cleared: %x", got) + } + if got, _ := extractStemOffset(accountBlob, bintrie.CodeHashLeafKey); got != nil { + t.Fatalf("CodeHash not cleared: %x", got) + } + if got := codec.ReadStorage(db, acctKey, storageKey); !bytes.Equal(got, storageValue) { + t.Fatalf("header storage lost after DeleteAccount: got %x, want %x", got, storageValue) + } +} + +// TestBintrieCodecDeleteLastOffsetRemovesKey verifies that when the +// final populated offset at a stem is cleared, the on-disk key is +// removed entirely (zero-length blobs are never persisted). +func TestBintrieCodecDeleteLastOffsetRemovesKey(t *testing.T) { + codec, db := newTestBintrieCodec(t) + addr := common.HexToAddress("0x5555555555555555555555555555555555555555") + slot := common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000080") + value := bytes.Repeat([]byte{0xDD}, stemBlobValueSize) + + acctKey, storageKey := codec.StorageKey(addr, slot) + + // Write, verify, delete, verify absent. + batch := db.NewBatch() + codec.WriteStorage(batch, acctKey, storageKey, value) + flushBatch(t, batch) + + if got := codec.ReadStorage(db, acctKey, storageKey); !bytes.Equal(got, value) { + t.Fatalf("pre-delete read: got %x, want %x", got, value) + } + + batch = db.NewBatch() + codec.DeleteStorage(batch, acctKey, storageKey) + flushBatch(t, batch) + + // The raw key should be gone from the store. + raw := rawdb.ReadBinTrieStem(db, stemFromKey(storageKey)) + if raw != nil { + t.Fatalf("stem blob should be deleted, got %x", raw) + } + // And ReadStorage returns nil. + if got := codec.ReadStorage(db, acctKey, storageKey); got != nil { + t.Fatalf("post-delete read: got %x, want nil", got) + } +} + +// TestBintrieCodecCacheKeysDisjoint verifies that the bintrie cache key +// prefix keeps it disjoint from merkle account keys. This is the +// collision check that Agent 2 flagged in the review. +func TestBintrieCodecCacheKeysDisjoint(t *testing.T) { + codec := &bintrieFlatCodec{} + merkle := &merkleFlatCodec{} + + // A 32-byte hash that, when passed to both codecs, would collide + // if the bintrie codec didn't prefix-disambiguate its cache keys. + hash := common.HexToHash("0xaabbccddeeff00112233445566778899aabbccddeeff00112233445566778899") + + binKey := codec.AccountCacheKey(hash) + merkleKey := merkle.AccountCacheKey(hash) + + if bytes.Equal(binKey, merkleKey) { + t.Fatalf("bintrie and merkle cache keys collided: both are %x", binKey) + } + if binKey[0] != bintrieCacheKeyPrefix { + t.Fatalf("bintrie cache key missing prefix byte: %x", binKey) + } +} + +// TestBintrieCodecSplitMarker verifies the single-tier marker handling. +// For merkle the marker is a two-tier (account, account+storage) pair; +// for bintrie it's a single 32-byte stem key, so SplitMarker returns +// the same slice twice. +func TestBintrieCodecSplitMarker(t *testing.T) { + codec := &bintrieFlatCodec{} + + // Nil marker. + acc, full := codec.SplitMarker(nil) + if acc != nil || full != nil { + t.Fatalf("nil marker: acc=%v full=%v, want nil/nil", acc, full) + } + + // A 32-byte marker. Both halves point to the same bytes. + marker := bytes.Repeat([]byte{0xAA}, 32) + acc, full = codec.SplitMarker(marker) + if !bytes.Equal(acc, marker) || !bytes.Equal(full, marker) { + t.Fatalf("SplitMarker: acc=%x full=%x, want both %x", acc, full, marker) + } +} diff --git a/triedb/pathdb/stem_blob.go b/triedb/pathdb/stem_blob.go new file mode 100644 index 0000000000..90aaa04c8f --- /dev/null +++ b/triedb/pathdb/stem_blob.go @@ -0,0 +1,327 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "errors" + "fmt" + "math/bits" + + "github.com/ethereum/go-ethereum/common" +) + +// Bintrie stem blob layout +// ------------------------ +// +// The flat-state representation of a bintrie stem packs the populated +// (offset, 32-byte value) pairs at that stem into a single on-disk blob. +// A stem holds up to 256 offsets (per EIP-7864, the full "stem group"), +// but in practice only a handful are populated for any given account +// (BasicData at offset 0, CodeHash at offset 1, a few storage slots, or +// code chunks). A dense encoding would waste 8 KB per stem; this layout +// scales linearly with the number of populated offsets. +// +// Layout: +// +// [ 0 .. 31 ] 32-byte bitmap; bit i set iff offset i has a value +// [32 .. 63 ] first populated offset's 32-byte value +// [64 .. 95 ] second populated offset's 32-byte value +// ... +// [32 + 32*(N-1) .. 32 + 32*N - 1] N-th populated offset's value +// +// where N = popcount(bitmap). Values appear in increasing offset order, +// which is the iteration order of the bitmap bits from least- to +// most-significant byte (byte 0 first, then byte 1, etc.), and within +// each byte from MSB (offset b*8) to LSB (offset b*8+7). +// +// An "absent" offset is one whose bitmap bit is clear; an offset whose +// value is 32 zero bytes is "present with zero value" — that is the +// tombstone convention used by BinaryTrie.DeleteStorage, which writes +// 32 zero bytes to mark a slot as cleared without removing it from the +// underlying StemNode's Values slice. +// +// An empty stem (all bits clear) is represented by a zero-length blob, +// and callers must delete the on-disk key rather than write a zero-length +// value. +const ( + stemBlobBitmapSize = 32 // bytes + stemBlobBitmapBits = stemBlobBitmapSize * 8 // 256 + stemBlobValueSize = common.HashLength // 32 +) + +// stemOffsetMax is the highest valid offset within a bintrie stem. +const stemOffsetMax = stemBlobBitmapBits - 1 // 255 + +var ( + errStemBlobTooShort = errors.New("stem blob shorter than bitmap") + errStemBlobMalformed = errors.New("stem blob length does not match bitmap popcount") + errStemBlobValueOutOfRange = errors.New("stem blob value slice out of range") +) + +// encodeStemBlob encodes a bitmap and a dense values slice (one entry per +// set bit, in ascending offset order) into the wire format described at +// the top of this file. +// +// The caller must ensure len(values) == popcount(bitmap) and that every +// entry in values has len == 32. If every bitmap bit is clear the function +// returns nil so the caller knows to delete the on-disk key. +func encodeStemBlob(bitmap [stemBlobBitmapSize]byte, values [][]byte) ([]byte, error) { + count := bitmapPopcount(bitmap) + if count != len(values) { + return nil, fmt.Errorf("stem blob popcount=%d values=%d: %w", count, len(values), errStemBlobMalformed) + } + if count == 0 { + return nil, nil + } + out := make([]byte, stemBlobBitmapSize+count*stemBlobValueSize) + copy(out, bitmap[:]) + for i, v := range values { + if len(v) != stemBlobValueSize { + return nil, fmt.Errorf("stem blob value %d has len %d: %w", i, len(v), errStemBlobMalformed) + } + copy(out[stemBlobBitmapSize+i*stemBlobValueSize:], v) + } + return out, nil +} + +// decodeStemBlob parses a raw stem blob into its bitmap and an ordered +// slice of populated 32-byte values. The returned values alias the input +// slice; callers must not retain or mutate them without copying first. +// +// A nil or zero-length blob decodes to a zero bitmap and no values +// (equivalent to "no offsets present"). +func decodeStemBlob(blob []byte) ([stemBlobBitmapSize]byte, [][]byte, error) { + var bitmap [stemBlobBitmapSize]byte + if len(blob) == 0 { + return bitmap, nil, nil + } + if len(blob) < stemBlobBitmapSize { + return bitmap, nil, errStemBlobTooShort + } + copy(bitmap[:], blob[:stemBlobBitmapSize]) + count := bitmapPopcount(bitmap) + expected := stemBlobBitmapSize + count*stemBlobValueSize + if len(blob) != expected { + return bitmap, nil, fmt.Errorf("stem blob len=%d popcount=%d expected=%d: %w", len(blob), count, expected, errStemBlobMalformed) + } + if count == 0 { + return bitmap, nil, nil + } + values := make([][]byte, count) + for i := range values { + start := stemBlobBitmapSize + i*stemBlobValueSize + values[i] = blob[start : start+stemBlobValueSize] + } + return bitmap, values, nil +} + +// extractStemOffset returns the 32-byte value at the given offset within +// a stem blob, or nil if the offset is not present. It does not allocate; +// the returned slice aliases the input blob and must not be mutated. +// +// Returns an error only if the blob itself is malformed. An absent offset +// in a well-formed blob is (nil, nil) — not an error. +func extractStemOffset(blob []byte, offset byte) ([]byte, error) { + if len(blob) == 0 { + return nil, nil + } + if len(blob) < stemBlobBitmapSize { + return nil, errStemBlobTooShort + } + var bitmap [stemBlobBitmapSize]byte + copy(bitmap[:], blob[:stemBlobBitmapSize]) + + // Is the offset present at all? + if !bitmapGet(bitmap, offset) { + return nil, nil + } + // Count how many set bits precede this offset to find the value slot. + idx := bitmapRank(bitmap, offset) + start := stemBlobBitmapSize + idx*stemBlobValueSize + end := start + stemBlobValueSize + if end > len(blob) { + return nil, errStemBlobValueOutOfRange + } + return blob[start:end], nil +} + +// stemBuilder accumulates (offset, value) pairs and produces a stem blob. +// It supports loading an existing blob, setting individual offsets, and +// emitting the final encoded form. +// +// Setting a value of nil or an empty slice clears the corresponding bit +// from the bitmap (the offset becomes "absent"). Setting a non-nil +// 32-byte slice — including 32 zero bytes — marks the offset present +// with that value. This preserves the distinction between absent and +// tombstoned-with-zero used elsewhere in the bintrie code. +// +// A stemBuilder is not safe for concurrent use. +type stemBuilder struct { + bitmap [stemBlobBitmapSize]byte + // values stores the current value at each offset, or nil if absent. + // Using a fixed 256-entry array avoids allocation churn as offsets + // are set and cleared. + values [stemBlobBitmapBits][]byte +} + +// newStemBuilder returns an empty stemBuilder. +func newStemBuilder() *stemBuilder { + return &stemBuilder{} +} + +// loadFromBlob merges the entries of the given stem blob into the builder. +// Existing entries at the same offsets are overwritten. An empty blob is +// a no-op. +func (b *stemBuilder) loadFromBlob(blob []byte) error { + if len(blob) == 0 { + return nil + } + bitmap, values, err := decodeStemBlob(blob) + if err != nil { + return err + } + // Walk the bitmap and copy each populated offset into the builder, + // stepping the values index in sync. + var vi int + for offset := range stemBlobBitmapBits { + if !bitmapGet(bitmap, byte(offset)) { + continue + } + // decodeStemBlob returns slices aliasing the input blob; we take + // an owning copy so the builder survives the caller mutating or + // releasing the source blob. + v := make([]byte, stemBlobValueSize) + copy(v, values[vi]) + b.values[offset] = v + b.bitmap[offset/8] |= 1 << (7 - uint(offset%8)) + vi++ + } + return nil +} + +// set writes value at the given offset. A nil or empty-length value +// clears the offset (bitmap bit cleared). A non-nil 32-byte value sets +// the offset present with that value. Setting with any other length +// panics — callers are expected to always pass 32-byte values. +func (b *stemBuilder) set(offset byte, value []byte) { + if len(value) == 0 { + b.values[offset] = nil + b.bitmap[offset/8] &^= 1 << (7 - uint(offset%8)) + return + } + if len(value) != stemBlobValueSize { + panic(fmt.Sprintf("stemBuilder: value at offset %d has len %d, want %d", offset, len(value), stemBlobValueSize)) + } + // Own the bytes so later caller mutations don't aliasing-surprise us. + owned := make([]byte, stemBlobValueSize) + copy(owned, value) + b.values[offset] = owned + b.bitmap[offset/8] |= 1 << (7 - uint(offset%8)) +} + +// empty reports whether no offsets are currently populated in the builder. +func (b *stemBuilder) empty() bool { + return bitmapPopcount(b.bitmap) == 0 +} + +// encode produces the stem blob encoding for the builder's current state. +// Returns nil for an empty builder so the caller can decide to delete the +// on-disk key rather than write a zero-length value. +func (b *stemBuilder) encode() []byte { + count := bitmapPopcount(b.bitmap) + if count == 0 { + return nil + } + out := make([]byte, stemBlobBitmapSize+count*stemBlobValueSize) + copy(out, b.bitmap[:]) + + // Walk the bitmap in ascending order, copying each populated value. + pos := stemBlobBitmapSize + for offset := range stemBlobBitmapBits { + if b.values[offset] == nil { + continue + } + copy(out[pos:], b.values[offset]) + pos += stemBlobValueSize + } + return out +} + +// reset clears all entries in the builder. +func (b *stemBuilder) reset() { + b.bitmap = [stemBlobBitmapSize]byte{} + b.values = [stemBlobBitmapBits][]byte{} +} + +// stemOffsetValue is a single (offset, value) pair passed to mergeStemBlob. +// A nil Value clears the offset. +type stemOffsetValue struct { + Offset byte + Value []byte +} + +// mergeStemBlob performs a read-modify-write on a stem blob: it decodes +// the existing blob (if any), applies the given writes in order, and +// returns a freshly encoded blob. Returns (nil, nil) when the result is +// empty — the caller should delete the on-disk key in that case. +func mergeStemBlob(existing []byte, writes []stemOffsetValue) ([]byte, error) { + b := newStemBuilder() + if err := b.loadFromBlob(existing); err != nil { + return nil, err + } + for _, w := range writes { + b.set(w.Offset, w.Value) + } + return b.encode(), nil +} + +// bitmapPopcount returns the number of set bits in the 32-byte bitmap. +func bitmapPopcount(bitmap [stemBlobBitmapSize]byte) int { + var n int + for _, b := range bitmap { + n += bits.OnesCount8(b) + } + return n +} + +// bitmapGet returns whether bit `offset` is set in the bitmap. The +// convention mirrors the bintrie: bit index `offset` lives in byte +// `offset/8`, with the MSB of that byte corresponding to the lowest +// in-byte offset (`offset%8 == 0`). +func bitmapGet(bitmap [stemBlobBitmapSize]byte, offset byte) bool { + return bitmap[offset/8]&(1<<(7-uint(offset%8))) != 0 +} + +// bitmapRank returns the number of set bits that come strictly before +// `offset` (in ascending offset order). The offset itself does not count. +func bitmapRank(bitmap [stemBlobBitmapSize]byte, offset byte) int { + // Full whole bytes before the target. + byteIdx := int(offset) / 8 + var rank int + for i := range byteIdx { + rank += bits.OnesCount8(bitmap[i]) + } + // Bits within the target byte that are above the target's bit. + bitIdx := offset % 8 + if bitIdx > 0 { + // The MSB is offset%8==0. We want bits 0..bitIdx-1 in that layout, + // which are the top bitIdx bits of the byte. + mask := byte(0xFF << (8 - bitIdx)) + rank += bits.OnesCount8(bitmap[byteIdx] & mask) + } + return rank +} diff --git a/triedb/pathdb/stem_blob_test.go b/triedb/pathdb/stem_blob_test.go new file mode 100644 index 0000000000..da57cf144f --- /dev/null +++ b/triedb/pathdb/stem_blob_test.go @@ -0,0 +1,361 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "testing" +) + +// mkval constructs a 32-byte value where the first byte is tag and the +// rest are zero. Used to make test assertions easy to read. +func mkval(tag byte) []byte { + v := make([]byte, stemBlobValueSize) + v[0] = tag + return v +} + +// TestStemBlobEmpty verifies that a builder with no entries encodes to +// nil (so callers delete the key) and decodes back to a zero bitmap and +// no values. +func TestStemBlobEmpty(t *testing.T) { + b := newStemBuilder() + if !b.empty() { + t.Fatal("fresh builder should be empty") + } + blob := b.encode() + if blob != nil { + t.Fatalf("empty builder should encode to nil, got %x", blob) + } + + // Decode nil and empty slice both yield an empty result. + for _, input := range [][]byte{nil, {}} { + bitmap, values, err := decodeStemBlob(input) + if err != nil { + t.Fatalf("decode empty: %v", err) + } + if values != nil { + t.Fatalf("decode empty values: got %v, want nil", values) + } + for i, b := range bitmap { + if b != 0 { + t.Fatalf("decode empty bitmap byte %d: got 0x%02x, want 0", i, b) + } + } + } +} + +// TestStemBlobBasicDataAndCodeHash verifies the "account header" encoding +// pattern: offsets 0 and 1 populated. This is the common case for every +// account update. +func TestStemBlobBasicDataAndCodeHash(t *testing.T) { + b := newStemBuilder() + basicData := mkval(0xAA) + codeHash := mkval(0xBB) + b.set(0, basicData) + b.set(1, codeHash) + + if b.empty() { + t.Fatal("builder should not be empty after two sets") + } + + blob := b.encode() + if blob == nil { + t.Fatal("encode should not return nil for populated builder") + } + if got, want := len(blob), stemBlobBitmapSize+2*stemBlobValueSize; got != want { + t.Fatalf("blob length: got %d, want %d", got, want) + } + + // Roundtrip through decodeStemBlob. + bitmap, values, err := decodeStemBlob(blob) + if err != nil { + t.Fatalf("decode: %v", err) + } + if got := bitmapPopcount(bitmap); got != 2 { + t.Fatalf("popcount: got %d, want 2", got) + } + if !bitmapGet(bitmap, 0) || !bitmapGet(bitmap, 1) { + t.Fatalf("bitmap missing offset 0 or 1: %x", bitmap) + } + if !bytes.Equal(values[0], basicData) { + t.Fatalf("value[0]: got %x, want %x", values[0], basicData) + } + if !bytes.Equal(values[1], codeHash) { + t.Fatalf("value[1]: got %x, want %x", values[1], codeHash) + } + + // Point reads via extractStemOffset. + got, err := extractStemOffset(blob, 0) + if err != nil { + t.Fatalf("extract offset 0: %v", err) + } + if !bytes.Equal(got, basicData) { + t.Fatalf("extract 0: got %x, want %x", got, basicData) + } + got, err = extractStemOffset(blob, 1) + if err != nil { + t.Fatalf("extract offset 1: %v", err) + } + if !bytes.Equal(got, codeHash) { + t.Fatalf("extract 1: got %x, want %x", got, codeHash) + } + // An unset offset returns (nil, nil). + got, err = extractStemOffset(blob, 42) + if err != nil { + t.Fatalf("extract unset offset: %v", err) + } + if got != nil { + t.Fatalf("extract unset: got %x, want nil", got) + } +} + +// TestStemBlobAllOffsets verifies that a fully-populated stem (all 256 +// offsets) encodes and decodes correctly. This is the worst-case size. +func TestStemBlobAllOffsets(t *testing.T) { + b := newStemBuilder() + for i := range stemBlobBitmapBits { + b.set(byte(i), mkval(byte(i))) + } + blob := b.encode() + expectedLen := stemBlobBitmapSize + stemBlobBitmapBits*stemBlobValueSize + if len(blob) != expectedLen { + t.Fatalf("blob length: got %d, want %d", len(blob), expectedLen) + } + + bitmap, _, err := decodeStemBlob(blob) + if err != nil { + t.Fatalf("decode: %v", err) + } + if bitmapPopcount(bitmap) != stemBlobBitmapBits { + t.Fatalf("popcount: got %d, want %d", bitmapPopcount(bitmap), stemBlobBitmapBits) + } + for i := range stemBlobBitmapBits { + got, err := extractStemOffset(blob, byte(i)) + if err != nil { + t.Fatalf("extract %d: %v", i, err) + } + if got[0] != byte(i) { + t.Fatalf("extract %d: tag 0x%02x, want 0x%02x", i, got[0], byte(i)) + } + } +} + +// TestStemBlobSparseHighOffsets verifies that non-contiguous offsets +// (typical for storage slots scattered across the stem) round-trip +// correctly. +func TestStemBlobSparseHighOffsets(t *testing.T) { + b := newStemBuilder() + offsets := []byte{3, 17, 64, 127, 128, 200, 255} + for _, o := range offsets { + b.set(o, mkval(o)) + } + blob := b.encode() + if len(blob) != stemBlobBitmapSize+len(offsets)*stemBlobValueSize { + t.Fatalf("unexpected blob length: %d", len(blob)) + } + + // Extract each and verify, including some absent offsets in between. + for _, o := range offsets { + got, err := extractStemOffset(blob, o) + if err != nil { + t.Fatalf("extract %d: %v", o, err) + } + if got[0] != o { + t.Fatalf("extract %d: tag 0x%02x, want 0x%02x", o, got[0], o) + } + } + // Spot-check absent offsets between populated ones. + for _, o := range []byte{0, 1, 2, 4, 18, 63, 126, 129, 199, 254} { + got, err := extractStemOffset(blob, o) + if err != nil { + t.Fatalf("extract absent %d: %v", o, err) + } + if got != nil { + t.Fatalf("extract absent %d: got %x, want nil", o, got) + } + } +} + +// TestStemBlobSetClearRoundtrip verifies that setting and then clearing +// an offset leaves the builder in the same state as never setting it. +func TestStemBlobSetClearRoundtrip(t *testing.T) { + b := newStemBuilder() + b.set(5, mkval(0xCD)) + if b.empty() { + t.Fatal("should not be empty after set") + } + b.set(5, nil) + if !b.empty() { + t.Fatal("should be empty after clearing the only entry") + } + if blob := b.encode(); blob != nil { + t.Fatalf("encode after clear: got %x, want nil", blob) + } +} + +// TestStemBlobLoadFromBlob verifies that an existing blob can be loaded +// into a fresh builder for read-modify-write semantics. +func TestStemBlobLoadFromBlob(t *testing.T) { + // Build an initial blob with two entries. + b1 := newStemBuilder() + b1.set(0, mkval(0x11)) + b1.set(64, mkval(0x22)) + initial := b1.encode() + + // Load into a fresh builder, modify, encode. + b2 := newStemBuilder() + if err := b2.loadFromBlob(initial); err != nil { + t.Fatalf("loadFromBlob: %v", err) + } + b2.set(0, mkval(0x33)) // overwrite offset 0 + b2.set(64, nil) // clear offset 64 + b2.set(128, mkval(0x44)) // add offset 128 + updated := b2.encode() + + // Offset 0 should have the new value. + got, err := extractStemOffset(updated, 0) + if err != nil || got == nil || got[0] != 0x33 { + t.Fatalf("offset 0 after update: got %x err=%v, want tag 0x33", got, err) + } + // Offset 64 should be absent. + got, err = extractStemOffset(updated, 64) + if err != nil { + t.Fatalf("offset 64 after clear: %v", err) + } + if got != nil { + t.Fatalf("offset 64 after clear: got %x, want nil", got) + } + // Offset 128 should have the new value. + got, err = extractStemOffset(updated, 128) + if err != nil || got == nil || got[0] != 0x44 { + t.Fatalf("offset 128 after update: got %x err=%v, want tag 0x44", got, err) + } +} + +// TestStemBlobMergeHelper verifies mergeStemBlob: read existing, apply +// writes, produce new blob in one call. +func TestStemBlobMergeHelper(t *testing.T) { + // Start with a blob containing offset 0. + b := newStemBuilder() + b.set(0, mkval(0x01)) + initial := b.encode() + + // Merge: overwrite 0, add 1, clear a non-existent offset (no-op). + result, err := mergeStemBlob(initial, []stemOffsetValue{ + {Offset: 0, Value: mkval(0x02)}, + {Offset: 1, Value: mkval(0x03)}, + {Offset: 100, Value: nil}, + }) + if err != nil { + t.Fatalf("merge: %v", err) + } + got, _ := extractStemOffset(result, 0) + if got == nil || got[0] != 0x02 { + t.Fatalf("merged offset 0: got %x, want tag 0x02", got) + } + got, _ = extractStemOffset(result, 1) + if got == nil || got[0] != 0x03 { + t.Fatalf("merged offset 1: got %x, want tag 0x03", got) + } +} + +// TestStemBlobMergeToEmpty verifies that clearing every populated entry +// via merge returns a nil blob (so the caller deletes the key). +func TestStemBlobMergeToEmpty(t *testing.T) { + b := newStemBuilder() + b.set(0, mkval(0x01)) + b.set(5, mkval(0x02)) + initial := b.encode() + + result, err := mergeStemBlob(initial, []stemOffsetValue{ + {Offset: 0, Value: nil}, + {Offset: 5, Value: nil}, + }) + if err != nil { + t.Fatalf("merge to empty: %v", err) + } + if result != nil { + t.Fatalf("merge to empty: got %x, want nil", result) + } +} + +// TestStemBlobTombstoneZeroBytes verifies that a 32-byte zero value is +// preserved as "present with zero value" — not confused with "absent". +// DeleteStorage uses this convention. +func TestStemBlobTombstoneZeroBytes(t *testing.T) { + b := newStemBuilder() + zeros := make([]byte, stemBlobValueSize) + b.set(64, zeros) + if b.empty() { + t.Fatal("zero-value entry should count as populated") + } + blob := b.encode() + got, err := extractStemOffset(blob, 64) + if err != nil { + t.Fatalf("extract tombstone: %v", err) + } + if !bytes.Equal(got, zeros) { + t.Fatalf("extract tombstone: got %x, want 32 zero bytes", got) + } +} + +// TestStemBlobMalformedInput verifies that decodeStemBlob detects +// malformed blobs with wrong lengths. +func TestStemBlobMalformedInput(t *testing.T) { + // Shorter than bitmap. + if _, _, err := decodeStemBlob(make([]byte, 10)); err == nil { + t.Fatal("expected error for too-short blob") + } + // Bitmap claims 2 entries but blob only has room for 1. + var bitmap [stemBlobBitmapSize]byte + bitmap[0] = 0xC0 // bits 0 and 1 set → 2 entries + short := make([]byte, stemBlobBitmapSize+stemBlobValueSize) + copy(short, bitmap[:]) + if _, _, err := decodeStemBlob(short); err == nil { + t.Fatal("expected error for blob shorter than bitmap implies") + } +} + +// TestBitmapRank sanity-checks the bit-to-index helper used by +// extractStemOffset for single-offset reads. +func TestBitmapRank(t *testing.T) { + var bitmap [stemBlobBitmapSize]byte + // Set bits at offsets 0, 1, 5, 64, 200. + for _, o := range []byte{0, 1, 5, 64, 200} { + bitmap[o/8] |= 1 << (7 - uint(o%8)) + } + cases := []struct { + offset byte + want int + }{ + {0, 0}, // first set bit is at index 0 + {1, 1}, // second set bit + {5, 2}, // third + {64, 3}, // fourth + {200, 4}, // fifth + // For an unset offset, rank returns the number of set bits < it. + {2, 2}, // bits 0 and 1 are before 2 + {100, 4}, // bits 0,1,5,64 are before 100 + {255, 5}, // all five bits are before 255 + } + for _, c := range cases { + if got := bitmapRank(bitmap, c.offset); got != c.want { + t.Errorf("bitmapRank(%d) = %d, want %d", c.offset, got, c.want) + } + } +}