core/state,triedb/pathdb: wire bintrie leaves through stateUpdate

Drains the binaryHasher's LeafProducer side-channel in StateDB.commit and
threads the stem writes through stateUpdate.encodeBinary into the pathdb
state set as per-offset accountData entries (key = stem||offset, value =
32-byte leaf or nil for clears).

The flat-state codec gains a Flush method that owns the in-memory→disk
write path, replacing the codec-agnostic per-entry loop in writeStates.
The merkle codec preserves its historical per-entry behavior verbatim;
the bintrie codec aggregates per-offset writes by stem so each stem hits
disk via a single read-modify-write, satisfying the codec's pre-aggregation
requirement and updating the clean cache with the merged blob it just
produced (no extra disk read).

stateUpdate.encodeBinary returns empty origin maps for the bintrie path:
state-history rollback for bintrie is deferred to a follow-up PR (see
BINTRIE_FLAT_STATE_REORG_GAP.md), and the diskLayer.revert path will
panic before consuming origins anyway.
This commit is contained in:
CPerezz 2026-04-07 22:35:24 +02:00
parent 29ef7576d9
commit a1ff36d9e1
No known key found for this signature in database
GPG key ID: 62045F34B97177DD
7 changed files with 456 additions and 105 deletions

View file

@ -382,3 +382,84 @@ func TestMerkleHasherNoLeafProducer(t *testing.T) {
t.Fatal("merkleHasher should NOT implement LeafProducer") t.Fatal("merkleHasher should NOT implement LeafProducer")
} }
} }
// TestStateUpdateEncodeBinaryFromLeaves verifies that stateUpdate.encodeBinary
// turns a slice of StemWrite values into the per-offset accountData map that
// pathdb's bintrie codec consumes. Three things matter:
//
// 1. Every leaf becomes one accountData entry, keyed by stem||offset.
// 2. nil-value leaves (account/storage deletes) become nil entries.
// 3. Non-nil leaves are deeply copied — encodeBinary must not retain
// pointers into the hasher's internal slab.
//
// storages/storageOrigin/accountOrigin remain empty: the bintrie path uses
// only accountData (per the layered-read design) and does not yet support
// state-history rollback.
func TestStateUpdateEncodeBinaryFromLeaves(t *testing.T) {
// Build a small leaves slice covering each kind of write the binary
// hasher emits: account update (BasicData + CodeHash), storage write,
// and a delete (nil value).
var (
stemA [bintrie.StemSize]byte
stemB [bintrie.StemSize]byte
)
for i := range stemA {
stemA[i] = byte(0x10 + i)
stemB[i] = byte(0xA0 + i)
}
basicDataValue := bytes.Repeat([]byte{0xAA}, 32)
codeHashValue := bytes.Repeat([]byte{0xBB}, 32)
storageValue := bytes.Repeat([]byte{0xCC}, 32)
leaves := []StemWrite{
// Account update at stemA: BasicData + CodeHash.
{Stem: stemA, Offset: bintrie.BasicDataLeafKey, Value: basicDataValue},
{Stem: stemA, Offset: bintrie.CodeHashLeafKey, Value: codeHashValue},
// Storage write at stemB.
{Stem: stemB, Offset: 7, Value: storageValue},
// Account delete at a third stem (nil values clear offsets 0+1).
{Stem: [bintrie.StemSize]byte{0xFF, 0xFF}, Offset: bintrie.BasicDataLeafKey, Value: nil},
{Stem: [bintrie.StemSize]byte{0xFF, 0xFF}, Offset: bintrie.CodeHashLeafKey, Value: nil},
}
su := &stateUpdate{leaves: leaves}
accounts, accountOrigin, storages, storageOrigin, err := su.encodeBinary()
if err != nil {
t.Fatalf("encodeBinary: %v", err)
}
if len(accounts) != len(leaves) {
t.Fatalf("accounts len = %d, want %d", len(accounts), len(leaves))
}
if len(storages) != 0 {
t.Errorf("storages should be empty for bintrie, got %d entries", len(storages))
}
if len(accountOrigin) != 0 || len(storageOrigin) != 0 {
t.Errorf("origin maps should be empty for bintrie")
}
// Check each leaf round-trips through the map under its full key.
for i, w := range leaves {
var fullKey common.Hash
copy(fullKey[:bintrie.StemSize], w.Stem[:])
fullKey[bintrie.StemSize] = w.Offset
got, ok := accounts[fullKey]
if !ok {
t.Errorf("leaf %d: missing key %x", i, fullKey)
continue
}
if w.Value == nil {
if got != nil {
t.Errorf("leaf %d: nil leaf became %x", i, got)
}
continue
}
if !bytes.Equal(got, w.Value) {
t.Errorf("leaf %d: got %x, want %x", i, got, w.Value)
}
// Aliasing check: the encoder must own its bytes.
if len(got) > 0 && &got[0] == &w.Value[0] {
t.Errorf("leaf %d: encodeBinary aliased the input slice", i)
}
}
}

View file

@ -1010,7 +1010,16 @@ func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool, blockNum
builder.CollectWitness(s.witness) builder.CollectWitness(s.witness)
} }
} }
return newStateUpdate(noStorageWiping, origin, root, blockNumber, deletes, updates, nodes, secondaryHashes), nil // If the hasher tracks flat-state leaf production (currently only the
// binary hasher), drain the buffered stem writes so the downstream
// state update can carry them into the pathdb flat-state layer. Merkle
// hashers do not implement this interface and the call short-circuits
// to nil — newStateUpdate accepts nil as "no leaves".
var leaves []StemWrite
if producer, ok := s.hasher.(LeafProducer); ok {
leaves = producer.DrainStemWrites()
}
return newStateUpdate(noStorageWiping, origin, root, blockNumber, deletes, updates, nodes, secondaryHashes, leaves), nil
} }
// commitAndFlush is a wrapper of commit which also commits the state mutations // commitAndFlush is a wrapper of commit which also commits the state mutations

View file

@ -91,6 +91,14 @@ type stateUpdate struct {
codes map[common.Address]*contractCode // codes contains mutated contract codes, keyed by address. codes map[common.Address]*contractCode // codes contains mutated contract codes, keyed by address.
nodes *trienode.MergedNodeSet // nodes aggregates all dirty trie nodes produced by the update. nodes *trienode.MergedNodeSet // nodes aggregates all dirty trie nodes produced by the update.
secondaryHashes map[common.Address]Hashes // hashes of secondary tries secondaryHashes map[common.Address]Hashes // hashes of secondary tries
// leaves is the ordered list of stem-offset writes harvested from a
// LeafProducer-capable hasher (the binary hasher). For merkle hashers
// it is always nil; for the binary hasher it is the bintrie's view of
// the same state mutations the trie just absorbed, in flat-state form.
// encodeBinary turns this into the per-offset accountData map that
// pathdb's bintrie codec consumes at flush time.
leaves []StemWrite
} }
// empty returns a flag indicating the state transition is empty or not. // empty returns a flag indicating the state transition is empty or not.
@ -104,7 +112,11 @@ func (sc *stateUpdate) empty() bool {
// //
// rawStorageKey is a flag indicating whether to use the raw storage slot key or // rawStorageKey is a flag indicating whether to use the raw storage slot key or
// the hash of the slot key for constructing state update object. // the hash of the slot key for constructing state update object.
func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash, blockNumber uint64, deletes map[common.Hash]*accountDelete, updates map[common.Hash]*accountUpdate, nodes *trienode.MergedNodeSet, secondaryHashes map[common.Address]Hashes) *stateUpdate { //
// leaves carries the per-offset stem writes produced by a LeafProducer-capable
// hasher (the binary hasher). It is nil for merkle hashers and consumed by
// encodeBinary to populate the bintrie flat-state map.
func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash, blockNumber uint64, deletes map[common.Hash]*accountDelete, updates map[common.Hash]*accountUpdate, nodes *trienode.MergedNodeSet, secondaryHashes map[common.Address]Hashes, leaves []StemWrite) *stateUpdate {
var ( var (
accounts = make(map[common.Hash]*Account) accounts = make(map[common.Hash]*Account)
accountsOrigin = make(map[common.Address]*Account) accountsOrigin = make(map[common.Address]*Account)
@ -182,6 +194,7 @@ func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash
codes: codes, codes: codes,
nodes: nodes, nodes: nodes,
secondaryHashes: secondaryHashes, secondaryHashes: secondaryHashes,
leaves: leaves,
} }
} }
@ -250,49 +263,50 @@ func (sc *stateUpdate) encodeMerkle() (map[common.Hash][]byte, map[common.Addres
return accounts, accountOrigin, storages, storageOrigin, nil return accounts, accountOrigin, storages, storageOrigin, nil
} }
// encodeBinary produces the bintrie flat-state representation consumed by
// pathdb. Unlike encodeMerkle (which keys accounts/storage by keccak hashes
// and slim-RLP encodes the values), the bintrie path uses one entry per
// EIP-7864 leaf:
//
// key = stem(31B) || offset(1B), zero-padded into a common.Hash
// value = the 32-byte leaf payload, or nil to clear the offset
//
// Account header writes (BasicData at offset 0, CodeHash at offset 1) and
// storage slot / code chunk writes are uniform — the binary hasher emits
// each as a stemWrite via DrainStemWrites and we route every one of them
// into the accounts map. The storages map stays empty: bintrie has no
// per-account storage grouping at the flat-state layer, and pathdb's
// disklayer/lookup tree both work fine with a single accountData map of
// 32-byte keys.
//
// accountOrigin and storageOrigin are returned empty because state-history
// rollback for bintrie is deferred to a follow-up PR (see
// BINTRIE_FLAT_STATE_REORG_GAP.md). The pathdb layer's revertTo path
// panics for bintrie before it would observe these maps anyway.
func (sc *stateUpdate) encodeBinary() (map[common.Hash][]byte, map[common.Address][]byte, map[common.Hash]map[common.Hash][]byte, map[common.Address]map[common.Hash][]byte, error) { func (sc *stateUpdate) encodeBinary() (map[common.Hash][]byte, map[common.Address][]byte, map[common.Hash]map[common.Hash][]byte, map[common.Address]map[common.Hash][]byte, error) {
var ( var (
accounts = make(map[common.Hash][]byte) accounts = make(map[common.Hash][]byte, len(sc.leaves))
storages = make(map[common.Hash]map[common.Hash][]byte) storages = make(map[common.Hash]map[common.Hash][]byte)
accountOrigin = make(map[common.Address][]byte) accountOrigin = make(map[common.Address][]byte)
storageOrigin = make(map[common.Address]map[common.Hash][]byte) storageOrigin = make(map[common.Address]map[common.Hash][]byte)
) )
for addr, prev := range sc.accountsOrigin { for _, w := range sc.leaves {
if prev == nil { var fullKey common.Hash
accountOrigin[addr] = nil copy(fullKey[:len(w.Stem)], w.Stem[:])
} else { fullKey[len(w.Stem)] = w.Offset
accountOrigin[addr] = types.SlimAccountRLP(types.StateAccount{ // nil Value means "clear this offset" (account delete or storage
Balance: prev.Balance, // slot wipe). The pathdb codec interprets a nil entry as a delete
Nonce: prev.Nonce, // during flush, matching merkle's nil-blob convention.
CodeHash: prev.CodeHash, if w.Value == nil {
}) accounts[fullKey] = nil
continue
} }
// Take an owning copy: the hasher reuses its underlying buffers
addrHash := crypto.Keccak256Hash(addr.Bytes()) // across blocks, so retaining its slices would create cross-block
data := sc.accounts[addrHash] // aliasing bugs in the pathdb diff layer.
if data == nil { v := make([]byte, len(w.Value))
accounts[addrHash] = nil copy(v, w.Value)
} else { accounts[fullKey] = v
accounts[addrHash] = types.SlimAccountRLP(types.StateAccount{
Balance: data.Balance,
Nonce: data.Nonce,
CodeHash: data.CodeHash,
})
}
}
for addr, slots := range sc.storagesOrigin {
subset := make(map[common.Hash][]byte)
for key, val := range slots {
subset[key] = encodeSlot(val)
}
storageOrigin[addr] = subset
}
for addrHash, slots := range sc.storages {
subset := make(map[common.Hash][]byte)
for key, val := range slots {
subset[key] = encodeSlot(val)
}
storages[addrHash] = subset
} }
return accounts, accountOrigin, storages, storageOrigin, nil return accounts, accountOrigin, storages, storageOrigin, nil
} }

View file

@ -19,6 +19,7 @@ package pathdb
import ( import (
"bytes" "bytes"
"github.com/VictoriaMetrics/fastcache"
"github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/crypto"
@ -127,6 +128,23 @@ type flatStateCodec interface {
// marker. Returns the same semantics as bytes.Compare. Used by the // marker. Returns the same semantics as bytes.Compare. Used by the
// disklayer.account/storage gating logic and by writeStates. // disklayer.account/storage gating logic and by writeStates.
MarkerCompare(key []byte, marker []byte) int MarkerCompare(key []byte, marker []byte) int
// Flush drains all pending mutations from the in-memory accountData and
// storageData maps into the supplied batch and updates the clean cache
// in lockstep. The codec controls iteration order, key derivation, and
// any aggregation that may be required (e.g. the bintrie codec must
// merge per-offset writes into per-stem read-modify-writes to avoid
// quadratic disk reads).
//
// Entries strictly past genMarker (per the codec's MarkerCompare
// semantics) are skipped because they will be regenerated by the
// background snapshot generator.
//
// Returns (account-entry count, storage-entry count) for metric
// reporting; the merkle codec reports one per map entry, while the
// bintrie codec reports one per logical offset write (so the metrics
// remain comparable across schemes).
Flush(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int)
} }
// merkleFlatCodec implements flatStateCodec for the keccak-keyed MPT flat // merkleFlatCodec implements flatStateCodec for the keccak-keyed MPT flat
@ -214,3 +232,72 @@ func (c *merkleFlatCodec) SplitMarker(marker []byte) ([]byte, []byte) {
func (c *merkleFlatCodec) MarkerCompare(key []byte, marker []byte) int { func (c *merkleFlatCodec) MarkerCompare(key []byte, marker []byte) int {
return bytes.Compare(key, marker) return bytes.Compare(key, marker)
} }
// Flush drains the supplied account/storage maps into the batch using the
// historical merkle per-entry layout: one rawdb write per accountData entry
// and one per storage slot. Entries past the genMarker are skipped (the
// generator will fill them in). The clean cache is kept in sync with each
// write so subsequent reads do not stale.
//
// This is the implementation that previously lived directly in writeStates.
// It has been moved into the codec so the bintrie codec can supply its own
// per-stem aggregating implementation alongside this one.
func (c *merkleFlatCodec) Flush(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int) {
var (
accounts int
slots int
)
for addrHash, blob := range accountData {
// Skip any account not yet covered by the snapshot. The account
// at the generation marker position (addrHash == genMarker[:common.HashLength])
// should still be updated, as it would be skipped in the next
// generation cycle.
if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 {
continue
}
accounts++
cacheKey := c.AccountCacheKey(addrHash)
if len(blob) == 0 {
c.DeleteAccount(batch, addrHash)
if clean != nil {
clean.Set(cacheKey, nil)
}
} else {
c.WriteAccount(batch, addrHash, blob)
if clean != nil {
clean.Set(cacheKey, blob)
}
}
}
for addrHash, storages := range storageData {
// Skip any account not covered yet by the snapshot
if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 {
continue
}
midAccount := genMarker != nil && bytes.Equal(addrHash[:], genMarker[:common.HashLength])
for storageHash, blob := range storages {
// Skip any storage slot not yet covered by the snapshot. The storage slot
// at the generation marker position (addrHash == genMarker[:common.HashLength]
// and storageHash == genMarker[common.HashLength:]) should still be updated,
// as it would be skipped in the next generation cycle.
if midAccount && bytes.Compare(storageHash[:], genMarker[common.HashLength:]) > 0 {
continue
}
slots++
cacheKey := c.StorageCacheKey(addrHash, storageHash)
if len(blob) == 0 {
c.DeleteStorage(batch, addrHash, storageHash)
if clean != nil {
clean.Set(cacheKey, nil)
}
} else {
c.WriteStorage(batch, addrHash, storageHash, blob)
if clean != nil {
clean.Set(cacheKey, blob)
}
}
}
}
return accounts, slots
}

View file

@ -20,6 +20,7 @@ import (
"bytes" "bytes"
"fmt" "fmt"
"github.com/VictoriaMetrics/fastcache"
"github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/ethdb"
@ -236,22 +237,27 @@ func (c *bintrieFlatCodec) DeleteStorage(batch ethdb.Batch, _ common.Hash, stora
// Write/Delete methods to ensure the policy (nil value clears, empty // Write/Delete methods to ensure the policy (nil value clears, empty
// blob deletes) is consistent. // blob deletes) is consistent.
// //
// Returns the merged blob (or nil if the stem was deleted) so callers
// such as Flush can repopulate the clean cache without an extra disk
// read. The returned slice is freshly allocated and owned by the caller.
//
// Important: the read comes from c.db, NOT from the batch. A second // Important: the read comes from c.db, NOT from the batch. A second
// call for the same stem within a flush would re-read the pre-flush // call for the same stem within a flush would re-read the pre-flush
// state; see the pre-aggregation requirement documented on // state; see the pre-aggregation requirement documented on
// bintrieFlatCodec. // bintrieFlatCodec.
func (c *bintrieFlatCodec) applyWrites(batch ethdb.Batch, stem []byte, writes []stemOffsetValue) { func (c *bintrieFlatCodec) applyWrites(batch ethdb.Batch, stem []byte, writes []stemOffsetValue) []byte {
existing := rawdb.ReadBinTrieStem(c.db, stem) existing := rawdb.ReadBinTrieStem(c.db, stem)
merged, err := mergeStemBlob(existing, writes) merged, err := mergeStemBlob(existing, writes)
if err != nil { if err != nil {
crit("bintrie applyWrites: %v", err) crit("bintrie applyWrites: %v", err)
return return nil
} }
if merged == nil { if merged == nil {
rawdb.DeleteBinTrieStem(batch, stem) rawdb.DeleteBinTrieStem(batch, stem)
return return nil
} }
rawdb.WriteBinTrieStem(batch, stem, merged) rawdb.WriteBinTrieStem(batch, stem, merged)
return merged
} }
// splitAccountBlob validates and splits the two-slot account payload // splitAccountBlob validates and splits the two-slot account payload
@ -375,6 +381,94 @@ func (c *bintrieFlatCodec) MarkerCompare(key []byte, marker []byte) int {
return bytes.Compare(key, marker) return bytes.Compare(key, marker)
} }
// Flush drains the in-memory accountData and storageData maps into the
// batch using the bintrie per-stem layout. The maps are expected to hold
// per-offset entries — each key is a 32-byte (stem || offset) tuple
// produced by AccountKey/StorageKey, and each value is a 32-byte leaf
// (or nil to clear that offset).
//
// All entries are first grouped by stem, then a single
// read-modify-write is issued per stem so the codec touches each stem
// at most once during a flush. This is what allows the per-call
// pre-aggregation requirement documented on bintrieFlatCodec to be
// satisfied even when many writes target the same stem.
//
// storageData is also walked because higher-level callers may emit
// storage entries that the codec routes through the storage map for
// historical reasons; for the bintrie path, entries should normally
// arrive on accountData but we accept either layout.
//
// Returns (offset count from accountData, offset count from storageData)
// so the metric reporting in writeStates remains comparable to the
// merkle path. The clean cache is updated with the merged stem blob
// (one cache entry per stem, not per offset) — readers extract the
// requested offset on hit.
func (c *bintrieFlatCodec) Flush(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int) {
// Aggregate per-offset writes into per-stem batches. We use [31]byte
// as the map key because bytes slices aren't hashable in Go and the
// stem itself is fixed size; the alternative (using common.Hash with
// a zero pad) would waste a byte per entry.
type aggregator struct {
writes []stemOffsetValue
}
aggregated := make(map[[bintrie.StemSize]byte]*aggregator)
addWrite := func(fullKey common.Hash, value []byte) {
var stem [bintrie.StemSize]byte
copy(stem[:], fullKey[:bintrie.StemSize])
offset := fullKey[bintrie.StemSize]
ag, exists := aggregated[stem]
if !exists {
ag = &aggregator{}
aggregated[stem] = ag
}
ag.writes = append(ag.writes, stemOffsetValue{Offset: offset, Value: value})
}
var (
accountWrites int
storageWrites int
)
for fullKey, value := range accountData {
// genMarker filtering: skip stems that the generator hasn't
// reached yet. We compare against the FULL key (stem || offset)
// because the bintrie marker is itself a 32-byte key.
if genMarker != nil && bytes.Compare(fullKey[:], genMarker) > 0 {
continue
}
accountWrites++
addWrite(fullKey, value)
}
for _, slots := range storageData {
for fullKey, value := range slots {
if genMarker != nil && bytes.Compare(fullKey[:], genMarker) > 0 {
continue
}
storageWrites++
addWrite(fullKey, value)
}
}
// Issue one RMW per stem and update the clean cache with the merged
// blob (or invalidate it if the stem was deleted).
for stem, ag := range aggregated {
merged := c.applyWrites(batch, stem[:], ag.writes)
if clean != nil {
// Reuse AccountCacheKey to derive the cache key — for
// bintrie this only depends on the stem so the trailing
// offset byte in the synthetic full key is irrelevant.
var fullKey common.Hash
copy(fullKey[:bintrie.StemSize], stem[:])
cacheKey := c.AccountCacheKey(fullKey)
if merged == nil {
clean.Set(cacheKey, nil)
} else {
clean.Set(cacheKey, merged)
}
}
}
return accountWrites, storageWrites
}
// crit is a shim around log.Crit that allows tests to replace the fatal // crit is a shim around log.Crit that allows tests to replace the fatal
// behavior with a panic if needed. Defined at the package level to match // behavior with a panic if needed. Defined at the package level to match
// the single-call-per-error style used by the merkle codec. // the single-call-per-error style used by the merkle codec.

View file

@ -265,3 +265,126 @@ func TestBintrieCodecSplitMarker(t *testing.T) {
t.Fatalf("SplitMarker: acc=%x full=%x, want both %x", acc, full, marker) t.Fatalf("SplitMarker: acc=%x full=%x, want both %x", acc, full, marker)
} }
} }
// TestBintrieCodecFlushAggregates verifies the per-stem aggregation that
// the codec's Flush method performs. Two distinct offsets at the SAME stem
// should produce a single on-disk stem blob containing both offsets after
// one Flush call — proving the codec collapses what would have been N
// read-modify-writes into one.
//
// Three offsets are written across two stems (2 + 1) so we exercise both
// the multi-offset and single-offset paths in a single test.
func TestBintrieCodecFlushAggregates(t *testing.T) {
codec, db := newTestBintrieCodec(t)
// Build a per-offset accountData map mimicking what encodeBinary
// produces from a binaryHasher.DrainStemWrites: the keys are full
// 32-byte (stem || offset) tuples and the values are 32-byte leaves.
addr := common.HexToAddress("0xCafeBabeDeadBeef00112233445566778899aabb")
stem := bintrie.GetBinaryTreeKey(addr, make([]byte, 32))[:bintrie.StemSize]
basicData := bytes.Repeat([]byte{0xAA}, stemBlobValueSize)
codeHash := bytes.Repeat([]byte{0xBB}, stemBlobValueSize)
storageVal := bytes.Repeat([]byte{0xCC}, stemBlobValueSize)
otherStem := bytes.Repeat([]byte{0x42}, bintrie.StemSize)
otherVal := bytes.Repeat([]byte{0xDD}, stemBlobValueSize)
mkKey := func(stem []byte, offset byte) common.Hash {
var k common.Hash
copy(k[:bintrie.StemSize], stem)
k[bintrie.StemSize] = offset
return k
}
accountData := map[common.Hash][]byte{
mkKey(stem, bintrie.BasicDataLeafKey): basicData,
mkKey(stem, bintrie.CodeHashLeafKey): codeHash,
mkKey(stem, 64): storageVal, // header storage slot
mkKey(otherStem, bintrie.BasicDataLeafKey): otherVal,
}
batch := db.NewBatch()
accW, stoW := codec.Flush(batch, nil, accountData, nil, nil)
flushBatch(t, batch)
if accW != 4 {
t.Errorf("account write count: got %d, want 4", accW)
}
if stoW != 0 {
t.Errorf("storage write count: got %d, want 0 (no storage map)", stoW)
}
// All three offsets at `stem` should be readable from a single on-disk
// blob; aggregation worked iff the second/third writes did not clobber
// the first.
blob := rawdb.ReadBinTrieStem(db, stem)
if len(blob) == 0 {
t.Fatal("stem blob missing after Flush")
}
for offset, want := range map[byte][]byte{
bintrie.BasicDataLeafKey: basicData,
bintrie.CodeHashLeafKey: codeHash,
64: storageVal,
} {
got, err := extractStemOffset(blob, offset)
if err != nil {
t.Fatalf("extract offset %d: %v", offset, err)
}
if !bytes.Equal(got, want) {
t.Errorf("offset %d: got %x, want %x", offset, got, want)
}
}
// The other stem should also have its single offset.
otherBlob := rawdb.ReadBinTrieStem(db, otherStem)
if got, _ := extractStemOffset(otherBlob, bintrie.BasicDataLeafKey); !bytes.Equal(got, otherVal) {
t.Errorf("other stem BasicData: got %x, want %x", got, otherVal)
}
}
// TestBintrieCodecFlushDelete verifies that nil-valued entries in the
// accountData map clear the corresponding offset, and that clearing every
// populated offset at a stem removes the on-disk key entirely (matching
// the per-call DeleteStorage semantics tested elsewhere).
func TestBintrieCodecFlushDelete(t *testing.T) {
codec, db := newTestBintrieCodec(t)
// Seed: write two offsets at one stem.
stem := bytes.Repeat([]byte{0x77}, bintrie.StemSize)
v0 := bytes.Repeat([]byte{0x01}, stemBlobValueSize)
v1 := bytes.Repeat([]byte{0x02}, stemBlobValueSize)
mkKey := func(offset byte) common.Hash {
var k common.Hash
copy(k[:bintrie.StemSize], stem)
k[bintrie.StemSize] = offset
return k
}
batch := db.NewBatch()
codec.Flush(batch, nil, map[common.Hash][]byte{
mkKey(0): v0,
mkKey(1): v1,
}, nil, nil)
flushBatch(t, batch)
// Now flush a nil for offset 0 — only offset 1 should remain.
batch = db.NewBatch()
codec.Flush(batch, nil, map[common.Hash][]byte{mkKey(0): nil}, nil, nil)
flushBatch(t, batch)
blob := rawdb.ReadBinTrieStem(db, stem)
if got, _ := extractStemOffset(blob, 0); got != nil {
t.Errorf("offset 0 should be cleared, got %x", got)
}
if got, _ := extractStemOffset(blob, 1); !bytes.Equal(got, v1) {
t.Errorf("offset 1 should survive, got %x want %x", got, v1)
}
// Clear the last remaining offset; the on-disk key should disappear.
batch = db.NewBatch()
codec.Flush(batch, nil, map[common.Hash][]byte{mkKey(1): nil}, nil, nil)
flushBatch(t, batch)
if raw := rawdb.ReadBinTrieStem(db, stem); raw != nil {
t.Errorf("stem should be deleted, got %x", raw)
}
}

View file

@ -17,8 +17,6 @@
package pathdb package pathdb
import ( import (
"bytes"
"github.com/VictoriaMetrics/fastcache" "github.com/VictoriaMetrics/fastcache"
"github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/rawdb"
@ -71,71 +69,16 @@ func writeNodes(batch ethdb.Batch, nodes map[common.Hash]map[string]*trienode.No
// This function assumes the background generator is already terminated and states // This function assumes the background generator is already terminated and states
// before the supplied marker has been correctly generated. // before the supplied marker has been correctly generated.
// //
// The codec parameter abstracts the trie-specific persistence and cache key // The codec parameter abstracts the trie-specific persistence: merkleFlatCodec
// derivation. The marker comparisons retain merkle-specific shape (two-tier // performs a per-entry rawdb write for each accountData/storageData entry,
// account+storage marker) because the bintrie path uses a separate writer // while bintrieFlatCodec aggregates per-offset writes into per-stem
// (writeStems, added in a later commit) that operates on a single-tier // read-modify-writes. Either way, the genMarker filtering, cache update, and
// marker over stems rather than (account, storage) pairs. // metric reporting all happen inside the codec — writeStates is just a thin
// dispatcher.
// //
// TODO(rjl493456442) do we really need this generation marker? The state updates // TODO(rjl493456442) do we really need this generation marker? The state updates
// after the marker can also be written and will be fixed by generator later if // after the marker can also be written and will be fixed by generator later if
// it's outdated. // it's outdated.
func writeStates(batch ethdb.Batch, codec flatStateCodec, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int) { func writeStates(batch ethdb.Batch, codec flatStateCodec, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int) {
var ( return codec.Flush(batch, genMarker, accountData, storageData, clean)
accounts int
slots int
)
for addrHash, blob := range accountData {
// Skip any account not yet covered by the snapshot. The account
// at the generation marker position (addrHash == genMarker[:common.HashLength])
// should still be updated, as it would be skipped in the next
// generation cycle.
if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 {
continue
}
accounts += 1
cacheKey := codec.AccountCacheKey(addrHash)
if len(blob) == 0 {
codec.DeleteAccount(batch, addrHash)
if clean != nil {
clean.Set(cacheKey, nil)
}
} else {
codec.WriteAccount(batch, addrHash, blob)
if clean != nil {
clean.Set(cacheKey, blob)
}
}
}
for addrHash, storages := range storageData {
// Skip any account not covered yet by the snapshot
if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 {
continue
}
midAccount := genMarker != nil && bytes.Equal(addrHash[:], genMarker[:common.HashLength])
for storageHash, blob := range storages {
// Skip any storage slot not yet covered by the snapshot. The storage slot
// at the generation marker position (addrHash == genMarker[:common.HashLength]
// and storageHash == genMarker[common.HashLength:]) should still be updated,
// as it would be skipped in the next generation cycle.
if midAccount && bytes.Compare(storageHash[:], genMarker[common.HashLength:]) > 0 {
continue
}
slots += 1
cacheKey := codec.StorageCacheKey(addrHash, storageHash)
if len(blob) == 0 {
codec.DeleteStorage(batch, addrHash, storageHash)
if clean != nil {
clean.Set(cacheKey, nil)
}
} else {
codec.WriteStorage(batch, addrHash, storageHash, blob)
if clean != nil {
clean.Set(cacheKey, blob)
}
}
}
}
return accounts, slots
} }