diff --git a/core/state/database_hasher_binary_test.go b/core/state/database_hasher_binary_test.go index 2be3d773a4..2a173e7a06 100644 --- a/core/state/database_hasher_binary_test.go +++ b/core/state/database_hasher_binary_test.go @@ -382,3 +382,84 @@ func TestMerkleHasherNoLeafProducer(t *testing.T) { t.Fatal("merkleHasher should NOT implement LeafProducer") } } + +// TestStateUpdateEncodeBinaryFromLeaves verifies that stateUpdate.encodeBinary +// turns a slice of StemWrite values into the per-offset accountData map that +// pathdb's bintrie codec consumes. Three things matter: +// +// 1. Every leaf becomes one accountData entry, keyed by stem||offset. +// 2. nil-value leaves (account/storage deletes) become nil entries. +// 3. Non-nil leaves are deeply copied — encodeBinary must not retain +// pointers into the hasher's internal slab. +// +// storages/storageOrigin/accountOrigin remain empty: the bintrie path uses +// only accountData (per the layered-read design) and does not yet support +// state-history rollback. +func TestStateUpdateEncodeBinaryFromLeaves(t *testing.T) { + // Build a small leaves slice covering each kind of write the binary + // hasher emits: account update (BasicData + CodeHash), storage write, + // and a delete (nil value). + var ( + stemA [bintrie.StemSize]byte + stemB [bintrie.StemSize]byte + ) + for i := range stemA { + stemA[i] = byte(0x10 + i) + stemB[i] = byte(0xA0 + i) + } + basicDataValue := bytes.Repeat([]byte{0xAA}, 32) + codeHashValue := bytes.Repeat([]byte{0xBB}, 32) + storageValue := bytes.Repeat([]byte{0xCC}, 32) + + leaves := []StemWrite{ + // Account update at stemA: BasicData + CodeHash. + {Stem: stemA, Offset: bintrie.BasicDataLeafKey, Value: basicDataValue}, + {Stem: stemA, Offset: bintrie.CodeHashLeafKey, Value: codeHashValue}, + // Storage write at stemB. + {Stem: stemB, Offset: 7, Value: storageValue}, + // Account delete at a third stem (nil values clear offsets 0+1). + {Stem: [bintrie.StemSize]byte{0xFF, 0xFF}, Offset: bintrie.BasicDataLeafKey, Value: nil}, + {Stem: [bintrie.StemSize]byte{0xFF, 0xFF}, Offset: bintrie.CodeHashLeafKey, Value: nil}, + } + + su := &stateUpdate{leaves: leaves} + accounts, accountOrigin, storages, storageOrigin, err := su.encodeBinary() + if err != nil { + t.Fatalf("encodeBinary: %v", err) + } + + if len(accounts) != len(leaves) { + t.Fatalf("accounts len = %d, want %d", len(accounts), len(leaves)) + } + if len(storages) != 0 { + t.Errorf("storages should be empty for bintrie, got %d entries", len(storages)) + } + if len(accountOrigin) != 0 || len(storageOrigin) != 0 { + t.Errorf("origin maps should be empty for bintrie") + } + + // Check each leaf round-trips through the map under its full key. + for i, w := range leaves { + var fullKey common.Hash + copy(fullKey[:bintrie.StemSize], w.Stem[:]) + fullKey[bintrie.StemSize] = w.Offset + got, ok := accounts[fullKey] + if !ok { + t.Errorf("leaf %d: missing key %x", i, fullKey) + continue + } + if w.Value == nil { + if got != nil { + t.Errorf("leaf %d: nil leaf became %x", i, got) + } + continue + } + if !bytes.Equal(got, w.Value) { + t.Errorf("leaf %d: got %x, want %x", i, got, w.Value) + } + // Aliasing check: the encoder must own its bytes. + if len(got) > 0 && &got[0] == &w.Value[0] { + t.Errorf("leaf %d: encodeBinary aliased the input slice", i) + } + } +} diff --git a/core/state/statedb.go b/core/state/statedb.go index e86a554508..df4a6ec215 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -1010,7 +1010,16 @@ func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool, blockNum builder.CollectWitness(s.witness) } } - return newStateUpdate(noStorageWiping, origin, root, blockNumber, deletes, updates, nodes, secondaryHashes), nil + // If the hasher tracks flat-state leaf production (currently only the + // binary hasher), drain the buffered stem writes so the downstream + // state update can carry them into the pathdb flat-state layer. Merkle + // hashers do not implement this interface and the call short-circuits + // to nil — newStateUpdate accepts nil as "no leaves". + var leaves []StemWrite + if producer, ok := s.hasher.(LeafProducer); ok { + leaves = producer.DrainStemWrites() + } + return newStateUpdate(noStorageWiping, origin, root, blockNumber, deletes, updates, nodes, secondaryHashes, leaves), nil } // commitAndFlush is a wrapper of commit which also commits the state mutations diff --git a/core/state/stateupdate.go b/core/state/stateupdate.go index 7659e9ff18..44df4427df 100644 --- a/core/state/stateupdate.go +++ b/core/state/stateupdate.go @@ -91,6 +91,14 @@ type stateUpdate struct { codes map[common.Address]*contractCode // codes contains mutated contract codes, keyed by address. nodes *trienode.MergedNodeSet // nodes aggregates all dirty trie nodes produced by the update. secondaryHashes map[common.Address]Hashes // hashes of secondary tries + + // leaves is the ordered list of stem-offset writes harvested from a + // LeafProducer-capable hasher (the binary hasher). For merkle hashers + // it is always nil; for the binary hasher it is the bintrie's view of + // the same state mutations the trie just absorbed, in flat-state form. + // encodeBinary turns this into the per-offset accountData map that + // pathdb's bintrie codec consumes at flush time. + leaves []StemWrite } // empty returns a flag indicating the state transition is empty or not. @@ -104,7 +112,11 @@ func (sc *stateUpdate) empty() bool { // // rawStorageKey is a flag indicating whether to use the raw storage slot key or // the hash of the slot key for constructing state update object. -func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash, blockNumber uint64, deletes map[common.Hash]*accountDelete, updates map[common.Hash]*accountUpdate, nodes *trienode.MergedNodeSet, secondaryHashes map[common.Address]Hashes) *stateUpdate { +// +// leaves carries the per-offset stem writes produced by a LeafProducer-capable +// hasher (the binary hasher). It is nil for merkle hashers and consumed by +// encodeBinary to populate the bintrie flat-state map. +func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash, blockNumber uint64, deletes map[common.Hash]*accountDelete, updates map[common.Hash]*accountUpdate, nodes *trienode.MergedNodeSet, secondaryHashes map[common.Address]Hashes, leaves []StemWrite) *stateUpdate { var ( accounts = make(map[common.Hash]*Account) accountsOrigin = make(map[common.Address]*Account) @@ -182,6 +194,7 @@ func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash codes: codes, nodes: nodes, secondaryHashes: secondaryHashes, + leaves: leaves, } } @@ -250,49 +263,50 @@ func (sc *stateUpdate) encodeMerkle() (map[common.Hash][]byte, map[common.Addres return accounts, accountOrigin, storages, storageOrigin, nil } +// encodeBinary produces the bintrie flat-state representation consumed by +// pathdb. Unlike encodeMerkle (which keys accounts/storage by keccak hashes +// and slim-RLP encodes the values), the bintrie path uses one entry per +// EIP-7864 leaf: +// +// key = stem(31B) || offset(1B), zero-padded into a common.Hash +// value = the 32-byte leaf payload, or nil to clear the offset +// +// Account header writes (BasicData at offset 0, CodeHash at offset 1) and +// storage slot / code chunk writes are uniform — the binary hasher emits +// each as a stemWrite via DrainStemWrites and we route every one of them +// into the accounts map. The storages map stays empty: bintrie has no +// per-account storage grouping at the flat-state layer, and pathdb's +// disklayer/lookup tree both work fine with a single accountData map of +// 32-byte keys. +// +// accountOrigin and storageOrigin are returned empty because state-history +// rollback for bintrie is deferred to a follow-up PR (see +// BINTRIE_FLAT_STATE_REORG_GAP.md). The pathdb layer's revertTo path +// panics for bintrie before it would observe these maps anyway. func (sc *stateUpdate) encodeBinary() (map[common.Hash][]byte, map[common.Address][]byte, map[common.Hash]map[common.Hash][]byte, map[common.Address]map[common.Hash][]byte, error) { var ( - accounts = make(map[common.Hash][]byte) + accounts = make(map[common.Hash][]byte, len(sc.leaves)) storages = make(map[common.Hash]map[common.Hash][]byte) accountOrigin = make(map[common.Address][]byte) storageOrigin = make(map[common.Address]map[common.Hash][]byte) ) - for addr, prev := range sc.accountsOrigin { - if prev == nil { - accountOrigin[addr] = nil - } else { - accountOrigin[addr] = types.SlimAccountRLP(types.StateAccount{ - Balance: prev.Balance, - Nonce: prev.Nonce, - CodeHash: prev.CodeHash, - }) + for _, w := range sc.leaves { + var fullKey common.Hash + copy(fullKey[:len(w.Stem)], w.Stem[:]) + fullKey[len(w.Stem)] = w.Offset + // nil Value means "clear this offset" (account delete or storage + // slot wipe). The pathdb codec interprets a nil entry as a delete + // during flush, matching merkle's nil-blob convention. + if w.Value == nil { + accounts[fullKey] = nil + continue } - - addrHash := crypto.Keccak256Hash(addr.Bytes()) - data := sc.accounts[addrHash] - if data == nil { - accounts[addrHash] = nil - } else { - accounts[addrHash] = types.SlimAccountRLP(types.StateAccount{ - Balance: data.Balance, - Nonce: data.Nonce, - CodeHash: data.CodeHash, - }) - } - } - for addr, slots := range sc.storagesOrigin { - subset := make(map[common.Hash][]byte) - for key, val := range slots { - subset[key] = encodeSlot(val) - } - storageOrigin[addr] = subset - } - for addrHash, slots := range sc.storages { - subset := make(map[common.Hash][]byte) - for key, val := range slots { - subset[key] = encodeSlot(val) - } - storages[addrHash] = subset + // Take an owning copy: the hasher reuses its underlying buffers + // across blocks, so retaining its slices would create cross-block + // aliasing bugs in the pathdb diff layer. + v := make([]byte, len(w.Value)) + copy(v, w.Value) + accounts[fullKey] = v } return accounts, accountOrigin, storages, storageOrigin, nil } diff --git a/triedb/pathdb/flat_codec.go b/triedb/pathdb/flat_codec.go index ca0f72a381..d7f1e5c650 100644 --- a/triedb/pathdb/flat_codec.go +++ b/triedb/pathdb/flat_codec.go @@ -19,6 +19,7 @@ package pathdb import ( "bytes" + "github.com/VictoriaMetrics/fastcache" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/crypto" @@ -127,6 +128,23 @@ type flatStateCodec interface { // marker. Returns the same semantics as bytes.Compare. Used by the // disklayer.account/storage gating logic and by writeStates. MarkerCompare(key []byte, marker []byte) int + + // Flush drains all pending mutations from the in-memory accountData and + // storageData maps into the supplied batch and updates the clean cache + // in lockstep. The codec controls iteration order, key derivation, and + // any aggregation that may be required (e.g. the bintrie codec must + // merge per-offset writes into per-stem read-modify-writes to avoid + // quadratic disk reads). + // + // Entries strictly past genMarker (per the codec's MarkerCompare + // semantics) are skipped because they will be regenerated by the + // background snapshot generator. + // + // Returns (account-entry count, storage-entry count) for metric + // reporting; the merkle codec reports one per map entry, while the + // bintrie codec reports one per logical offset write (so the metrics + // remain comparable across schemes). + Flush(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int) } // merkleFlatCodec implements flatStateCodec for the keccak-keyed MPT flat @@ -214,3 +232,72 @@ func (c *merkleFlatCodec) SplitMarker(marker []byte) ([]byte, []byte) { func (c *merkleFlatCodec) MarkerCompare(key []byte, marker []byte) int { return bytes.Compare(key, marker) } + +// Flush drains the supplied account/storage maps into the batch using the +// historical merkle per-entry layout: one rawdb write per accountData entry +// and one per storage slot. Entries past the genMarker are skipped (the +// generator will fill them in). The clean cache is kept in sync with each +// write so subsequent reads do not stale. +// +// This is the implementation that previously lived directly in writeStates. +// It has been moved into the codec so the bintrie codec can supply its own +// per-stem aggregating implementation alongside this one. +func (c *merkleFlatCodec) Flush(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int) { + var ( + accounts int + slots int + ) + for addrHash, blob := range accountData { + // Skip any account not yet covered by the snapshot. The account + // at the generation marker position (addrHash == genMarker[:common.HashLength]) + // should still be updated, as it would be skipped in the next + // generation cycle. + if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 { + continue + } + accounts++ + cacheKey := c.AccountCacheKey(addrHash) + if len(blob) == 0 { + c.DeleteAccount(batch, addrHash) + if clean != nil { + clean.Set(cacheKey, nil) + } + } else { + c.WriteAccount(batch, addrHash, blob) + if clean != nil { + clean.Set(cacheKey, blob) + } + } + } + for addrHash, storages := range storageData { + // Skip any account not covered yet by the snapshot + if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 { + continue + } + midAccount := genMarker != nil && bytes.Equal(addrHash[:], genMarker[:common.HashLength]) + + for storageHash, blob := range storages { + // Skip any storage slot not yet covered by the snapshot. The storage slot + // at the generation marker position (addrHash == genMarker[:common.HashLength] + // and storageHash == genMarker[common.HashLength:]) should still be updated, + // as it would be skipped in the next generation cycle. + if midAccount && bytes.Compare(storageHash[:], genMarker[common.HashLength:]) > 0 { + continue + } + slots++ + cacheKey := c.StorageCacheKey(addrHash, storageHash) + if len(blob) == 0 { + c.DeleteStorage(batch, addrHash, storageHash) + if clean != nil { + clean.Set(cacheKey, nil) + } + } else { + c.WriteStorage(batch, addrHash, storageHash, blob) + if clean != nil { + clean.Set(cacheKey, blob) + } + } + } + } + return accounts, slots +} diff --git a/triedb/pathdb/flat_codec_bintrie.go b/triedb/pathdb/flat_codec_bintrie.go index d4d2bb565e..fe93a75e9e 100644 --- a/triedb/pathdb/flat_codec_bintrie.go +++ b/triedb/pathdb/flat_codec_bintrie.go @@ -20,6 +20,7 @@ import ( "bytes" "fmt" + "github.com/VictoriaMetrics/fastcache" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" @@ -236,22 +237,27 @@ func (c *bintrieFlatCodec) DeleteStorage(batch ethdb.Batch, _ common.Hash, stora // Write/Delete methods to ensure the policy (nil value clears, empty // blob deletes) is consistent. // +// Returns the merged blob (or nil if the stem was deleted) so callers +// such as Flush can repopulate the clean cache without an extra disk +// read. The returned slice is freshly allocated and owned by the caller. +// // Important: the read comes from c.db, NOT from the batch. A second // call for the same stem within a flush would re-read the pre-flush // state; see the pre-aggregation requirement documented on // bintrieFlatCodec. -func (c *bintrieFlatCodec) applyWrites(batch ethdb.Batch, stem []byte, writes []stemOffsetValue) { +func (c *bintrieFlatCodec) applyWrites(batch ethdb.Batch, stem []byte, writes []stemOffsetValue) []byte { existing := rawdb.ReadBinTrieStem(c.db, stem) merged, err := mergeStemBlob(existing, writes) if err != nil { crit("bintrie applyWrites: %v", err) - return + return nil } if merged == nil { rawdb.DeleteBinTrieStem(batch, stem) - return + return nil } rawdb.WriteBinTrieStem(batch, stem, merged) + return merged } // splitAccountBlob validates and splits the two-slot account payload @@ -375,6 +381,94 @@ func (c *bintrieFlatCodec) MarkerCompare(key []byte, marker []byte) int { return bytes.Compare(key, marker) } +// Flush drains the in-memory accountData and storageData maps into the +// batch using the bintrie per-stem layout. The maps are expected to hold +// per-offset entries — each key is a 32-byte (stem || offset) tuple +// produced by AccountKey/StorageKey, and each value is a 32-byte leaf +// (or nil to clear that offset). +// +// All entries are first grouped by stem, then a single +// read-modify-write is issued per stem so the codec touches each stem +// at most once during a flush. This is what allows the per-call +// pre-aggregation requirement documented on bintrieFlatCodec to be +// satisfied even when many writes target the same stem. +// +// storageData is also walked because higher-level callers may emit +// storage entries that the codec routes through the storage map for +// historical reasons; for the bintrie path, entries should normally +// arrive on accountData but we accept either layout. +// +// Returns (offset count from accountData, offset count from storageData) +// so the metric reporting in writeStates remains comparable to the +// merkle path. The clean cache is updated with the merged stem blob +// (one cache entry per stem, not per offset) — readers extract the +// requested offset on hit. +func (c *bintrieFlatCodec) Flush(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int) { + // Aggregate per-offset writes into per-stem batches. We use [31]byte + // as the map key because bytes slices aren't hashable in Go and the + // stem itself is fixed size; the alternative (using common.Hash with + // a zero pad) would waste a byte per entry. + type aggregator struct { + writes []stemOffsetValue + } + aggregated := make(map[[bintrie.StemSize]byte]*aggregator) + + addWrite := func(fullKey common.Hash, value []byte) { + var stem [bintrie.StemSize]byte + copy(stem[:], fullKey[:bintrie.StemSize]) + offset := fullKey[bintrie.StemSize] + ag, exists := aggregated[stem] + if !exists { + ag = &aggregator{} + aggregated[stem] = ag + } + ag.writes = append(ag.writes, stemOffsetValue{Offset: offset, Value: value}) + } + + var ( + accountWrites int + storageWrites int + ) + for fullKey, value := range accountData { + // genMarker filtering: skip stems that the generator hasn't + // reached yet. We compare against the FULL key (stem || offset) + // because the bintrie marker is itself a 32-byte key. + if genMarker != nil && bytes.Compare(fullKey[:], genMarker) > 0 { + continue + } + accountWrites++ + addWrite(fullKey, value) + } + for _, slots := range storageData { + for fullKey, value := range slots { + if genMarker != nil && bytes.Compare(fullKey[:], genMarker) > 0 { + continue + } + storageWrites++ + addWrite(fullKey, value) + } + } + // Issue one RMW per stem and update the clean cache with the merged + // blob (or invalidate it if the stem was deleted). + for stem, ag := range aggregated { + merged := c.applyWrites(batch, stem[:], ag.writes) + if clean != nil { + // Reuse AccountCacheKey to derive the cache key — for + // bintrie this only depends on the stem so the trailing + // offset byte in the synthetic full key is irrelevant. + var fullKey common.Hash + copy(fullKey[:bintrie.StemSize], stem[:]) + cacheKey := c.AccountCacheKey(fullKey) + if merged == nil { + clean.Set(cacheKey, nil) + } else { + clean.Set(cacheKey, merged) + } + } + } + return accountWrites, storageWrites +} + // crit is a shim around log.Crit that allows tests to replace the fatal // behavior with a panic if needed. Defined at the package level to match // the single-call-per-error style used by the merkle codec. diff --git a/triedb/pathdb/flat_codec_bintrie_test.go b/triedb/pathdb/flat_codec_bintrie_test.go index a55f211547..e5960ae03c 100644 --- a/triedb/pathdb/flat_codec_bintrie_test.go +++ b/triedb/pathdb/flat_codec_bintrie_test.go @@ -265,3 +265,126 @@ func TestBintrieCodecSplitMarker(t *testing.T) { t.Fatalf("SplitMarker: acc=%x full=%x, want both %x", acc, full, marker) } } + +// TestBintrieCodecFlushAggregates verifies the per-stem aggregation that +// the codec's Flush method performs. Two distinct offsets at the SAME stem +// should produce a single on-disk stem blob containing both offsets after +// one Flush call — proving the codec collapses what would have been N +// read-modify-writes into one. +// +// Three offsets are written across two stems (2 + 1) so we exercise both +// the multi-offset and single-offset paths in a single test. +func TestBintrieCodecFlushAggregates(t *testing.T) { + codec, db := newTestBintrieCodec(t) + + // Build a per-offset accountData map mimicking what encodeBinary + // produces from a binaryHasher.DrainStemWrites: the keys are full + // 32-byte (stem || offset) tuples and the values are 32-byte leaves. + addr := common.HexToAddress("0xCafeBabeDeadBeef00112233445566778899aabb") + stem := bintrie.GetBinaryTreeKey(addr, make([]byte, 32))[:bintrie.StemSize] + + basicData := bytes.Repeat([]byte{0xAA}, stemBlobValueSize) + codeHash := bytes.Repeat([]byte{0xBB}, stemBlobValueSize) + storageVal := bytes.Repeat([]byte{0xCC}, stemBlobValueSize) + otherStem := bytes.Repeat([]byte{0x42}, bintrie.StemSize) + otherVal := bytes.Repeat([]byte{0xDD}, stemBlobValueSize) + + mkKey := func(stem []byte, offset byte) common.Hash { + var k common.Hash + copy(k[:bintrie.StemSize], stem) + k[bintrie.StemSize] = offset + return k + } + accountData := map[common.Hash][]byte{ + mkKey(stem, bintrie.BasicDataLeafKey): basicData, + mkKey(stem, bintrie.CodeHashLeafKey): codeHash, + mkKey(stem, 64): storageVal, // header storage slot + mkKey(otherStem, bintrie.BasicDataLeafKey): otherVal, + } + + batch := db.NewBatch() + accW, stoW := codec.Flush(batch, nil, accountData, nil, nil) + flushBatch(t, batch) + + if accW != 4 { + t.Errorf("account write count: got %d, want 4", accW) + } + if stoW != 0 { + t.Errorf("storage write count: got %d, want 0 (no storage map)", stoW) + } + + // All three offsets at `stem` should be readable from a single on-disk + // blob; aggregation worked iff the second/third writes did not clobber + // the first. + blob := rawdb.ReadBinTrieStem(db, stem) + if len(blob) == 0 { + t.Fatal("stem blob missing after Flush") + } + for offset, want := range map[byte][]byte{ + bintrie.BasicDataLeafKey: basicData, + bintrie.CodeHashLeafKey: codeHash, + 64: storageVal, + } { + got, err := extractStemOffset(blob, offset) + if err != nil { + t.Fatalf("extract offset %d: %v", offset, err) + } + if !bytes.Equal(got, want) { + t.Errorf("offset %d: got %x, want %x", offset, got, want) + } + } + + // The other stem should also have its single offset. + otherBlob := rawdb.ReadBinTrieStem(db, otherStem) + if got, _ := extractStemOffset(otherBlob, bintrie.BasicDataLeafKey); !bytes.Equal(got, otherVal) { + t.Errorf("other stem BasicData: got %x, want %x", got, otherVal) + } +} + +// TestBintrieCodecFlushDelete verifies that nil-valued entries in the +// accountData map clear the corresponding offset, and that clearing every +// populated offset at a stem removes the on-disk key entirely (matching +// the per-call DeleteStorage semantics tested elsewhere). +func TestBintrieCodecFlushDelete(t *testing.T) { + codec, db := newTestBintrieCodec(t) + + // Seed: write two offsets at one stem. + stem := bytes.Repeat([]byte{0x77}, bintrie.StemSize) + v0 := bytes.Repeat([]byte{0x01}, stemBlobValueSize) + v1 := bytes.Repeat([]byte{0x02}, stemBlobValueSize) + + mkKey := func(offset byte) common.Hash { + var k common.Hash + copy(k[:bintrie.StemSize], stem) + k[bintrie.StemSize] = offset + return k + } + batch := db.NewBatch() + codec.Flush(batch, nil, map[common.Hash][]byte{ + mkKey(0): v0, + mkKey(1): v1, + }, nil, nil) + flushBatch(t, batch) + + // Now flush a nil for offset 0 — only offset 1 should remain. + batch = db.NewBatch() + codec.Flush(batch, nil, map[common.Hash][]byte{mkKey(0): nil}, nil, nil) + flushBatch(t, batch) + + blob := rawdb.ReadBinTrieStem(db, stem) + if got, _ := extractStemOffset(blob, 0); got != nil { + t.Errorf("offset 0 should be cleared, got %x", got) + } + if got, _ := extractStemOffset(blob, 1); !bytes.Equal(got, v1) { + t.Errorf("offset 1 should survive, got %x want %x", got, v1) + } + + // Clear the last remaining offset; the on-disk key should disappear. + batch = db.NewBatch() + codec.Flush(batch, nil, map[common.Hash][]byte{mkKey(1): nil}, nil, nil) + flushBatch(t, batch) + + if raw := rawdb.ReadBinTrieStem(db, stem); raw != nil { + t.Errorf("stem should be deleted, got %x", raw) + } +} diff --git a/triedb/pathdb/flush.go b/triedb/pathdb/flush.go index f0f56e482a..f2c8b6922b 100644 --- a/triedb/pathdb/flush.go +++ b/triedb/pathdb/flush.go @@ -17,8 +17,6 @@ package pathdb import ( - "bytes" - "github.com/VictoriaMetrics/fastcache" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" @@ -71,71 +69,16 @@ func writeNodes(batch ethdb.Batch, nodes map[common.Hash]map[string]*trienode.No // This function assumes the background generator is already terminated and states // before the supplied marker has been correctly generated. // -// The codec parameter abstracts the trie-specific persistence and cache key -// derivation. The marker comparisons retain merkle-specific shape (two-tier -// account+storage marker) because the bintrie path uses a separate writer -// (writeStems, added in a later commit) that operates on a single-tier -// marker over stems rather than (account, storage) pairs. +// The codec parameter abstracts the trie-specific persistence: merkleFlatCodec +// performs a per-entry rawdb write for each accountData/storageData entry, +// while bintrieFlatCodec aggregates per-offset writes into per-stem +// read-modify-writes. Either way, the genMarker filtering, cache update, and +// metric reporting all happen inside the codec — writeStates is just a thin +// dispatcher. // // TODO(rjl493456442) do we really need this generation marker? The state updates // after the marker can also be written and will be fixed by generator later if // it's outdated. func writeStates(batch ethdb.Batch, codec flatStateCodec, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int) { - var ( - accounts int - slots int - ) - for addrHash, blob := range accountData { - // Skip any account not yet covered by the snapshot. The account - // at the generation marker position (addrHash == genMarker[:common.HashLength]) - // should still be updated, as it would be skipped in the next - // generation cycle. - if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 { - continue - } - accounts += 1 - cacheKey := codec.AccountCacheKey(addrHash) - if len(blob) == 0 { - codec.DeleteAccount(batch, addrHash) - if clean != nil { - clean.Set(cacheKey, nil) - } - } else { - codec.WriteAccount(batch, addrHash, blob) - if clean != nil { - clean.Set(cacheKey, blob) - } - } - } - for addrHash, storages := range storageData { - // Skip any account not covered yet by the snapshot - if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 { - continue - } - midAccount := genMarker != nil && bytes.Equal(addrHash[:], genMarker[:common.HashLength]) - - for storageHash, blob := range storages { - // Skip any storage slot not yet covered by the snapshot. The storage slot - // at the generation marker position (addrHash == genMarker[:common.HashLength] - // and storageHash == genMarker[common.HashLength:]) should still be updated, - // as it would be skipped in the next generation cycle. - if midAccount && bytes.Compare(storageHash[:], genMarker[common.HashLength:]) > 0 { - continue - } - slots += 1 - cacheKey := codec.StorageCacheKey(addrHash, storageHash) - if len(blob) == 0 { - codec.DeleteStorage(batch, addrHash, storageHash) - if clean != nil { - clean.Set(cacheKey, nil) - } - } else { - codec.WriteStorage(batch, addrHash, storageHash, blob) - if clean != nil { - clean.Set(cacheKey, blob) - } - } - } - } - return accounts, slots + return codec.Flush(batch, genMarker, accountData, storageData, clean) }