mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-06-12 01:41:36 +00:00
core/state,triedb/pathdb: wire bintrie leaves through stateUpdate
Drains the binaryHasher's LeafProducer side-channel in StateDB.commit and threads the stem writes through stateUpdate.encodeBinary into the pathdb state set as per-offset accountData entries (key = stem||offset, value = 32-byte leaf or nil for clears). The flat-state codec gains a Flush method that owns the in-memory→disk write path, replacing the codec-agnostic per-entry loop in writeStates. The merkle codec preserves its historical per-entry behavior verbatim; the bintrie codec aggregates per-offset writes by stem so each stem hits disk via a single read-modify-write, satisfying the codec's pre-aggregation requirement and updating the clean cache with the merged blob it just produced (no extra disk read). stateUpdate.encodeBinary returns empty origin maps for the bintrie path: state-history rollback for bintrie is deferred to a follow-up PR (see BINTRIE_FLAT_STATE_REORG_GAP.md), and the diskLayer.revert path will panic before consuming origins anyway.
This commit is contained in:
parent
29ef7576d9
commit
a1ff36d9e1
7 changed files with 456 additions and 105 deletions
|
|
@ -382,3 +382,84 @@ func TestMerkleHasherNoLeafProducer(t *testing.T) {
|
|||
t.Fatal("merkleHasher should NOT implement LeafProducer")
|
||||
}
|
||||
}
|
||||
|
||||
// TestStateUpdateEncodeBinaryFromLeaves verifies that stateUpdate.encodeBinary
|
||||
// turns a slice of StemWrite values into the per-offset accountData map that
|
||||
// pathdb's bintrie codec consumes. Three things matter:
|
||||
//
|
||||
// 1. Every leaf becomes one accountData entry, keyed by stem||offset.
|
||||
// 2. nil-value leaves (account/storage deletes) become nil entries.
|
||||
// 3. Non-nil leaves are deeply copied — encodeBinary must not retain
|
||||
// pointers into the hasher's internal slab.
|
||||
//
|
||||
// storages/storageOrigin/accountOrigin remain empty: the bintrie path uses
|
||||
// only accountData (per the layered-read design) and does not yet support
|
||||
// state-history rollback.
|
||||
func TestStateUpdateEncodeBinaryFromLeaves(t *testing.T) {
|
||||
// Build a small leaves slice covering each kind of write the binary
|
||||
// hasher emits: account update (BasicData + CodeHash), storage write,
|
||||
// and a delete (nil value).
|
||||
var (
|
||||
stemA [bintrie.StemSize]byte
|
||||
stemB [bintrie.StemSize]byte
|
||||
)
|
||||
for i := range stemA {
|
||||
stemA[i] = byte(0x10 + i)
|
||||
stemB[i] = byte(0xA0 + i)
|
||||
}
|
||||
basicDataValue := bytes.Repeat([]byte{0xAA}, 32)
|
||||
codeHashValue := bytes.Repeat([]byte{0xBB}, 32)
|
||||
storageValue := bytes.Repeat([]byte{0xCC}, 32)
|
||||
|
||||
leaves := []StemWrite{
|
||||
// Account update at stemA: BasicData + CodeHash.
|
||||
{Stem: stemA, Offset: bintrie.BasicDataLeafKey, Value: basicDataValue},
|
||||
{Stem: stemA, Offset: bintrie.CodeHashLeafKey, Value: codeHashValue},
|
||||
// Storage write at stemB.
|
||||
{Stem: stemB, Offset: 7, Value: storageValue},
|
||||
// Account delete at a third stem (nil values clear offsets 0+1).
|
||||
{Stem: [bintrie.StemSize]byte{0xFF, 0xFF}, Offset: bintrie.BasicDataLeafKey, Value: nil},
|
||||
{Stem: [bintrie.StemSize]byte{0xFF, 0xFF}, Offset: bintrie.CodeHashLeafKey, Value: nil},
|
||||
}
|
||||
|
||||
su := &stateUpdate{leaves: leaves}
|
||||
accounts, accountOrigin, storages, storageOrigin, err := su.encodeBinary()
|
||||
if err != nil {
|
||||
t.Fatalf("encodeBinary: %v", err)
|
||||
}
|
||||
|
||||
if len(accounts) != len(leaves) {
|
||||
t.Fatalf("accounts len = %d, want %d", len(accounts), len(leaves))
|
||||
}
|
||||
if len(storages) != 0 {
|
||||
t.Errorf("storages should be empty for bintrie, got %d entries", len(storages))
|
||||
}
|
||||
if len(accountOrigin) != 0 || len(storageOrigin) != 0 {
|
||||
t.Errorf("origin maps should be empty for bintrie")
|
||||
}
|
||||
|
||||
// Check each leaf round-trips through the map under its full key.
|
||||
for i, w := range leaves {
|
||||
var fullKey common.Hash
|
||||
copy(fullKey[:bintrie.StemSize], w.Stem[:])
|
||||
fullKey[bintrie.StemSize] = w.Offset
|
||||
got, ok := accounts[fullKey]
|
||||
if !ok {
|
||||
t.Errorf("leaf %d: missing key %x", i, fullKey)
|
||||
continue
|
||||
}
|
||||
if w.Value == nil {
|
||||
if got != nil {
|
||||
t.Errorf("leaf %d: nil leaf became %x", i, got)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if !bytes.Equal(got, w.Value) {
|
||||
t.Errorf("leaf %d: got %x, want %x", i, got, w.Value)
|
||||
}
|
||||
// Aliasing check: the encoder must own its bytes.
|
||||
if len(got) > 0 && &got[0] == &w.Value[0] {
|
||||
t.Errorf("leaf %d: encodeBinary aliased the input slice", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1010,7 +1010,16 @@ func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool, blockNum
|
|||
builder.CollectWitness(s.witness)
|
||||
}
|
||||
}
|
||||
return newStateUpdate(noStorageWiping, origin, root, blockNumber, deletes, updates, nodes, secondaryHashes), nil
|
||||
// If the hasher tracks flat-state leaf production (currently only the
|
||||
// binary hasher), drain the buffered stem writes so the downstream
|
||||
// state update can carry them into the pathdb flat-state layer. Merkle
|
||||
// hashers do not implement this interface and the call short-circuits
|
||||
// to nil — newStateUpdate accepts nil as "no leaves".
|
||||
var leaves []StemWrite
|
||||
if producer, ok := s.hasher.(LeafProducer); ok {
|
||||
leaves = producer.DrainStemWrites()
|
||||
}
|
||||
return newStateUpdate(noStorageWiping, origin, root, blockNumber, deletes, updates, nodes, secondaryHashes, leaves), nil
|
||||
}
|
||||
|
||||
// commitAndFlush is a wrapper of commit which also commits the state mutations
|
||||
|
|
|
|||
|
|
@ -91,6 +91,14 @@ type stateUpdate struct {
|
|||
codes map[common.Address]*contractCode // codes contains mutated contract codes, keyed by address.
|
||||
nodes *trienode.MergedNodeSet // nodes aggregates all dirty trie nodes produced by the update.
|
||||
secondaryHashes map[common.Address]Hashes // hashes of secondary tries
|
||||
|
||||
// leaves is the ordered list of stem-offset writes harvested from a
|
||||
// LeafProducer-capable hasher (the binary hasher). For merkle hashers
|
||||
// it is always nil; for the binary hasher it is the bintrie's view of
|
||||
// the same state mutations the trie just absorbed, in flat-state form.
|
||||
// encodeBinary turns this into the per-offset accountData map that
|
||||
// pathdb's bintrie codec consumes at flush time.
|
||||
leaves []StemWrite
|
||||
}
|
||||
|
||||
// empty returns a flag indicating the state transition is empty or not.
|
||||
|
|
@ -104,7 +112,11 @@ func (sc *stateUpdate) empty() bool {
|
|||
//
|
||||
// rawStorageKey is a flag indicating whether to use the raw storage slot key or
|
||||
// the hash of the slot key for constructing state update object.
|
||||
func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash, blockNumber uint64, deletes map[common.Hash]*accountDelete, updates map[common.Hash]*accountUpdate, nodes *trienode.MergedNodeSet, secondaryHashes map[common.Address]Hashes) *stateUpdate {
|
||||
//
|
||||
// leaves carries the per-offset stem writes produced by a LeafProducer-capable
|
||||
// hasher (the binary hasher). It is nil for merkle hashers and consumed by
|
||||
// encodeBinary to populate the bintrie flat-state map.
|
||||
func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash, blockNumber uint64, deletes map[common.Hash]*accountDelete, updates map[common.Hash]*accountUpdate, nodes *trienode.MergedNodeSet, secondaryHashes map[common.Address]Hashes, leaves []StemWrite) *stateUpdate {
|
||||
var (
|
||||
accounts = make(map[common.Hash]*Account)
|
||||
accountsOrigin = make(map[common.Address]*Account)
|
||||
|
|
@ -182,6 +194,7 @@ func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash
|
|||
codes: codes,
|
||||
nodes: nodes,
|
||||
secondaryHashes: secondaryHashes,
|
||||
leaves: leaves,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -250,49 +263,50 @@ func (sc *stateUpdate) encodeMerkle() (map[common.Hash][]byte, map[common.Addres
|
|||
return accounts, accountOrigin, storages, storageOrigin, nil
|
||||
}
|
||||
|
||||
// encodeBinary produces the bintrie flat-state representation consumed by
|
||||
// pathdb. Unlike encodeMerkle (which keys accounts/storage by keccak hashes
|
||||
// and slim-RLP encodes the values), the bintrie path uses one entry per
|
||||
// EIP-7864 leaf:
|
||||
//
|
||||
// key = stem(31B) || offset(1B), zero-padded into a common.Hash
|
||||
// value = the 32-byte leaf payload, or nil to clear the offset
|
||||
//
|
||||
// Account header writes (BasicData at offset 0, CodeHash at offset 1) and
|
||||
// storage slot / code chunk writes are uniform — the binary hasher emits
|
||||
// each as a stemWrite via DrainStemWrites and we route every one of them
|
||||
// into the accounts map. The storages map stays empty: bintrie has no
|
||||
// per-account storage grouping at the flat-state layer, and pathdb's
|
||||
// disklayer/lookup tree both work fine with a single accountData map of
|
||||
// 32-byte keys.
|
||||
//
|
||||
// accountOrigin and storageOrigin are returned empty because state-history
|
||||
// rollback for bintrie is deferred to a follow-up PR (see
|
||||
// BINTRIE_FLAT_STATE_REORG_GAP.md). The pathdb layer's revertTo path
|
||||
// panics for bintrie before it would observe these maps anyway.
|
||||
func (sc *stateUpdate) encodeBinary() (map[common.Hash][]byte, map[common.Address][]byte, map[common.Hash]map[common.Hash][]byte, map[common.Address]map[common.Hash][]byte, error) {
|
||||
var (
|
||||
accounts = make(map[common.Hash][]byte)
|
||||
accounts = make(map[common.Hash][]byte, len(sc.leaves))
|
||||
storages = make(map[common.Hash]map[common.Hash][]byte)
|
||||
accountOrigin = make(map[common.Address][]byte)
|
||||
storageOrigin = make(map[common.Address]map[common.Hash][]byte)
|
||||
)
|
||||
for addr, prev := range sc.accountsOrigin {
|
||||
if prev == nil {
|
||||
accountOrigin[addr] = nil
|
||||
} else {
|
||||
accountOrigin[addr] = types.SlimAccountRLP(types.StateAccount{
|
||||
Balance: prev.Balance,
|
||||
Nonce: prev.Nonce,
|
||||
CodeHash: prev.CodeHash,
|
||||
})
|
||||
for _, w := range sc.leaves {
|
||||
var fullKey common.Hash
|
||||
copy(fullKey[:len(w.Stem)], w.Stem[:])
|
||||
fullKey[len(w.Stem)] = w.Offset
|
||||
// nil Value means "clear this offset" (account delete or storage
|
||||
// slot wipe). The pathdb codec interprets a nil entry as a delete
|
||||
// during flush, matching merkle's nil-blob convention.
|
||||
if w.Value == nil {
|
||||
accounts[fullKey] = nil
|
||||
continue
|
||||
}
|
||||
|
||||
addrHash := crypto.Keccak256Hash(addr.Bytes())
|
||||
data := sc.accounts[addrHash]
|
||||
if data == nil {
|
||||
accounts[addrHash] = nil
|
||||
} else {
|
||||
accounts[addrHash] = types.SlimAccountRLP(types.StateAccount{
|
||||
Balance: data.Balance,
|
||||
Nonce: data.Nonce,
|
||||
CodeHash: data.CodeHash,
|
||||
})
|
||||
}
|
||||
}
|
||||
for addr, slots := range sc.storagesOrigin {
|
||||
subset := make(map[common.Hash][]byte)
|
||||
for key, val := range slots {
|
||||
subset[key] = encodeSlot(val)
|
||||
}
|
||||
storageOrigin[addr] = subset
|
||||
}
|
||||
for addrHash, slots := range sc.storages {
|
||||
subset := make(map[common.Hash][]byte)
|
||||
for key, val := range slots {
|
||||
subset[key] = encodeSlot(val)
|
||||
}
|
||||
storages[addrHash] = subset
|
||||
// Take an owning copy: the hasher reuses its underlying buffers
|
||||
// across blocks, so retaining its slices would create cross-block
|
||||
// aliasing bugs in the pathdb diff layer.
|
||||
v := make([]byte, len(w.Value))
|
||||
copy(v, w.Value)
|
||||
accounts[fullKey] = v
|
||||
}
|
||||
return accounts, accountOrigin, storages, storageOrigin, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ package pathdb
|
|||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/VictoriaMetrics/fastcache"
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/core/rawdb"
|
||||
"github.com/ethereum/go-ethereum/crypto"
|
||||
|
|
@ -127,6 +128,23 @@ type flatStateCodec interface {
|
|||
// marker. Returns the same semantics as bytes.Compare. Used by the
|
||||
// disklayer.account/storage gating logic and by writeStates.
|
||||
MarkerCompare(key []byte, marker []byte) int
|
||||
|
||||
// Flush drains all pending mutations from the in-memory accountData and
|
||||
// storageData maps into the supplied batch and updates the clean cache
|
||||
// in lockstep. The codec controls iteration order, key derivation, and
|
||||
// any aggregation that may be required (e.g. the bintrie codec must
|
||||
// merge per-offset writes into per-stem read-modify-writes to avoid
|
||||
// quadratic disk reads).
|
||||
//
|
||||
// Entries strictly past genMarker (per the codec's MarkerCompare
|
||||
// semantics) are skipped because they will be regenerated by the
|
||||
// background snapshot generator.
|
||||
//
|
||||
// Returns (account-entry count, storage-entry count) for metric
|
||||
// reporting; the merkle codec reports one per map entry, while the
|
||||
// bintrie codec reports one per logical offset write (so the metrics
|
||||
// remain comparable across schemes).
|
||||
Flush(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int)
|
||||
}
|
||||
|
||||
// merkleFlatCodec implements flatStateCodec for the keccak-keyed MPT flat
|
||||
|
|
@ -214,3 +232,72 @@ func (c *merkleFlatCodec) SplitMarker(marker []byte) ([]byte, []byte) {
|
|||
func (c *merkleFlatCodec) MarkerCompare(key []byte, marker []byte) int {
|
||||
return bytes.Compare(key, marker)
|
||||
}
|
||||
|
||||
// Flush drains the supplied account/storage maps into the batch using the
|
||||
// historical merkle per-entry layout: one rawdb write per accountData entry
|
||||
// and one per storage slot. Entries past the genMarker are skipped (the
|
||||
// generator will fill them in). The clean cache is kept in sync with each
|
||||
// write so subsequent reads do not stale.
|
||||
//
|
||||
// This is the implementation that previously lived directly in writeStates.
|
||||
// It has been moved into the codec so the bintrie codec can supply its own
|
||||
// per-stem aggregating implementation alongside this one.
|
||||
func (c *merkleFlatCodec) Flush(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int) {
|
||||
var (
|
||||
accounts int
|
||||
slots int
|
||||
)
|
||||
for addrHash, blob := range accountData {
|
||||
// Skip any account not yet covered by the snapshot. The account
|
||||
// at the generation marker position (addrHash == genMarker[:common.HashLength])
|
||||
// should still be updated, as it would be skipped in the next
|
||||
// generation cycle.
|
||||
if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 {
|
||||
continue
|
||||
}
|
||||
accounts++
|
||||
cacheKey := c.AccountCacheKey(addrHash)
|
||||
if len(blob) == 0 {
|
||||
c.DeleteAccount(batch, addrHash)
|
||||
if clean != nil {
|
||||
clean.Set(cacheKey, nil)
|
||||
}
|
||||
} else {
|
||||
c.WriteAccount(batch, addrHash, blob)
|
||||
if clean != nil {
|
||||
clean.Set(cacheKey, blob)
|
||||
}
|
||||
}
|
||||
}
|
||||
for addrHash, storages := range storageData {
|
||||
// Skip any account not covered yet by the snapshot
|
||||
if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 {
|
||||
continue
|
||||
}
|
||||
midAccount := genMarker != nil && bytes.Equal(addrHash[:], genMarker[:common.HashLength])
|
||||
|
||||
for storageHash, blob := range storages {
|
||||
// Skip any storage slot not yet covered by the snapshot. The storage slot
|
||||
// at the generation marker position (addrHash == genMarker[:common.HashLength]
|
||||
// and storageHash == genMarker[common.HashLength:]) should still be updated,
|
||||
// as it would be skipped in the next generation cycle.
|
||||
if midAccount && bytes.Compare(storageHash[:], genMarker[common.HashLength:]) > 0 {
|
||||
continue
|
||||
}
|
||||
slots++
|
||||
cacheKey := c.StorageCacheKey(addrHash, storageHash)
|
||||
if len(blob) == 0 {
|
||||
c.DeleteStorage(batch, addrHash, storageHash)
|
||||
if clean != nil {
|
||||
clean.Set(cacheKey, nil)
|
||||
}
|
||||
} else {
|
||||
c.WriteStorage(batch, addrHash, storageHash, blob)
|
||||
if clean != nil {
|
||||
clean.Set(cacheKey, blob)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return accounts, slots
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import (
|
|||
"bytes"
|
||||
"fmt"
|
||||
|
||||
"github.com/VictoriaMetrics/fastcache"
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/core/rawdb"
|
||||
"github.com/ethereum/go-ethereum/ethdb"
|
||||
|
|
@ -236,22 +237,27 @@ func (c *bintrieFlatCodec) DeleteStorage(batch ethdb.Batch, _ common.Hash, stora
|
|||
// Write/Delete methods to ensure the policy (nil value clears, empty
|
||||
// blob deletes) is consistent.
|
||||
//
|
||||
// Returns the merged blob (or nil if the stem was deleted) so callers
|
||||
// such as Flush can repopulate the clean cache without an extra disk
|
||||
// read. The returned slice is freshly allocated and owned by the caller.
|
||||
//
|
||||
// Important: the read comes from c.db, NOT from the batch. A second
|
||||
// call for the same stem within a flush would re-read the pre-flush
|
||||
// state; see the pre-aggregation requirement documented on
|
||||
// bintrieFlatCodec.
|
||||
func (c *bintrieFlatCodec) applyWrites(batch ethdb.Batch, stem []byte, writes []stemOffsetValue) {
|
||||
func (c *bintrieFlatCodec) applyWrites(batch ethdb.Batch, stem []byte, writes []stemOffsetValue) []byte {
|
||||
existing := rawdb.ReadBinTrieStem(c.db, stem)
|
||||
merged, err := mergeStemBlob(existing, writes)
|
||||
if err != nil {
|
||||
crit("bintrie applyWrites: %v", err)
|
||||
return
|
||||
return nil
|
||||
}
|
||||
if merged == nil {
|
||||
rawdb.DeleteBinTrieStem(batch, stem)
|
||||
return
|
||||
return nil
|
||||
}
|
||||
rawdb.WriteBinTrieStem(batch, stem, merged)
|
||||
return merged
|
||||
}
|
||||
|
||||
// splitAccountBlob validates and splits the two-slot account payload
|
||||
|
|
@ -375,6 +381,94 @@ func (c *bintrieFlatCodec) MarkerCompare(key []byte, marker []byte) int {
|
|||
return bytes.Compare(key, marker)
|
||||
}
|
||||
|
||||
// Flush drains the in-memory accountData and storageData maps into the
|
||||
// batch using the bintrie per-stem layout. The maps are expected to hold
|
||||
// per-offset entries — each key is a 32-byte (stem || offset) tuple
|
||||
// produced by AccountKey/StorageKey, and each value is a 32-byte leaf
|
||||
// (or nil to clear that offset).
|
||||
//
|
||||
// All entries are first grouped by stem, then a single
|
||||
// read-modify-write is issued per stem so the codec touches each stem
|
||||
// at most once during a flush. This is what allows the per-call
|
||||
// pre-aggregation requirement documented on bintrieFlatCodec to be
|
||||
// satisfied even when many writes target the same stem.
|
||||
//
|
||||
// storageData is also walked because higher-level callers may emit
|
||||
// storage entries that the codec routes through the storage map for
|
||||
// historical reasons; for the bintrie path, entries should normally
|
||||
// arrive on accountData but we accept either layout.
|
||||
//
|
||||
// Returns (offset count from accountData, offset count from storageData)
|
||||
// so the metric reporting in writeStates remains comparable to the
|
||||
// merkle path. The clean cache is updated with the merged stem blob
|
||||
// (one cache entry per stem, not per offset) — readers extract the
|
||||
// requested offset on hit.
|
||||
func (c *bintrieFlatCodec) Flush(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int) {
|
||||
// Aggregate per-offset writes into per-stem batches. We use [31]byte
|
||||
// as the map key because bytes slices aren't hashable in Go and the
|
||||
// stem itself is fixed size; the alternative (using common.Hash with
|
||||
// a zero pad) would waste a byte per entry.
|
||||
type aggregator struct {
|
||||
writes []stemOffsetValue
|
||||
}
|
||||
aggregated := make(map[[bintrie.StemSize]byte]*aggregator)
|
||||
|
||||
addWrite := func(fullKey common.Hash, value []byte) {
|
||||
var stem [bintrie.StemSize]byte
|
||||
copy(stem[:], fullKey[:bintrie.StemSize])
|
||||
offset := fullKey[bintrie.StemSize]
|
||||
ag, exists := aggregated[stem]
|
||||
if !exists {
|
||||
ag = &aggregator{}
|
||||
aggregated[stem] = ag
|
||||
}
|
||||
ag.writes = append(ag.writes, stemOffsetValue{Offset: offset, Value: value})
|
||||
}
|
||||
|
||||
var (
|
||||
accountWrites int
|
||||
storageWrites int
|
||||
)
|
||||
for fullKey, value := range accountData {
|
||||
// genMarker filtering: skip stems that the generator hasn't
|
||||
// reached yet. We compare against the FULL key (stem || offset)
|
||||
// because the bintrie marker is itself a 32-byte key.
|
||||
if genMarker != nil && bytes.Compare(fullKey[:], genMarker) > 0 {
|
||||
continue
|
||||
}
|
||||
accountWrites++
|
||||
addWrite(fullKey, value)
|
||||
}
|
||||
for _, slots := range storageData {
|
||||
for fullKey, value := range slots {
|
||||
if genMarker != nil && bytes.Compare(fullKey[:], genMarker) > 0 {
|
||||
continue
|
||||
}
|
||||
storageWrites++
|
||||
addWrite(fullKey, value)
|
||||
}
|
||||
}
|
||||
// Issue one RMW per stem and update the clean cache with the merged
|
||||
// blob (or invalidate it if the stem was deleted).
|
||||
for stem, ag := range aggregated {
|
||||
merged := c.applyWrites(batch, stem[:], ag.writes)
|
||||
if clean != nil {
|
||||
// Reuse AccountCacheKey to derive the cache key — for
|
||||
// bintrie this only depends on the stem so the trailing
|
||||
// offset byte in the synthetic full key is irrelevant.
|
||||
var fullKey common.Hash
|
||||
copy(fullKey[:bintrie.StemSize], stem[:])
|
||||
cacheKey := c.AccountCacheKey(fullKey)
|
||||
if merged == nil {
|
||||
clean.Set(cacheKey, nil)
|
||||
} else {
|
||||
clean.Set(cacheKey, merged)
|
||||
}
|
||||
}
|
||||
}
|
||||
return accountWrites, storageWrites
|
||||
}
|
||||
|
||||
// crit is a shim around log.Crit that allows tests to replace the fatal
|
||||
// behavior with a panic if needed. Defined at the package level to match
|
||||
// the single-call-per-error style used by the merkle codec.
|
||||
|
|
|
|||
|
|
@ -265,3 +265,126 @@ func TestBintrieCodecSplitMarker(t *testing.T) {
|
|||
t.Fatalf("SplitMarker: acc=%x full=%x, want both %x", acc, full, marker)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBintrieCodecFlushAggregates verifies the per-stem aggregation that
|
||||
// the codec's Flush method performs. Two distinct offsets at the SAME stem
|
||||
// should produce a single on-disk stem blob containing both offsets after
|
||||
// one Flush call — proving the codec collapses what would have been N
|
||||
// read-modify-writes into one.
|
||||
//
|
||||
// Three offsets are written across two stems (2 + 1) so we exercise both
|
||||
// the multi-offset and single-offset paths in a single test.
|
||||
func TestBintrieCodecFlushAggregates(t *testing.T) {
|
||||
codec, db := newTestBintrieCodec(t)
|
||||
|
||||
// Build a per-offset accountData map mimicking what encodeBinary
|
||||
// produces from a binaryHasher.DrainStemWrites: the keys are full
|
||||
// 32-byte (stem || offset) tuples and the values are 32-byte leaves.
|
||||
addr := common.HexToAddress("0xCafeBabeDeadBeef00112233445566778899aabb")
|
||||
stem := bintrie.GetBinaryTreeKey(addr, make([]byte, 32))[:bintrie.StemSize]
|
||||
|
||||
basicData := bytes.Repeat([]byte{0xAA}, stemBlobValueSize)
|
||||
codeHash := bytes.Repeat([]byte{0xBB}, stemBlobValueSize)
|
||||
storageVal := bytes.Repeat([]byte{0xCC}, stemBlobValueSize)
|
||||
otherStem := bytes.Repeat([]byte{0x42}, bintrie.StemSize)
|
||||
otherVal := bytes.Repeat([]byte{0xDD}, stemBlobValueSize)
|
||||
|
||||
mkKey := func(stem []byte, offset byte) common.Hash {
|
||||
var k common.Hash
|
||||
copy(k[:bintrie.StemSize], stem)
|
||||
k[bintrie.StemSize] = offset
|
||||
return k
|
||||
}
|
||||
accountData := map[common.Hash][]byte{
|
||||
mkKey(stem, bintrie.BasicDataLeafKey): basicData,
|
||||
mkKey(stem, bintrie.CodeHashLeafKey): codeHash,
|
||||
mkKey(stem, 64): storageVal, // header storage slot
|
||||
mkKey(otherStem, bintrie.BasicDataLeafKey): otherVal,
|
||||
}
|
||||
|
||||
batch := db.NewBatch()
|
||||
accW, stoW := codec.Flush(batch, nil, accountData, nil, nil)
|
||||
flushBatch(t, batch)
|
||||
|
||||
if accW != 4 {
|
||||
t.Errorf("account write count: got %d, want 4", accW)
|
||||
}
|
||||
if stoW != 0 {
|
||||
t.Errorf("storage write count: got %d, want 0 (no storage map)", stoW)
|
||||
}
|
||||
|
||||
// All three offsets at `stem` should be readable from a single on-disk
|
||||
// blob; aggregation worked iff the second/third writes did not clobber
|
||||
// the first.
|
||||
blob := rawdb.ReadBinTrieStem(db, stem)
|
||||
if len(blob) == 0 {
|
||||
t.Fatal("stem blob missing after Flush")
|
||||
}
|
||||
for offset, want := range map[byte][]byte{
|
||||
bintrie.BasicDataLeafKey: basicData,
|
||||
bintrie.CodeHashLeafKey: codeHash,
|
||||
64: storageVal,
|
||||
} {
|
||||
got, err := extractStemOffset(blob, offset)
|
||||
if err != nil {
|
||||
t.Fatalf("extract offset %d: %v", offset, err)
|
||||
}
|
||||
if !bytes.Equal(got, want) {
|
||||
t.Errorf("offset %d: got %x, want %x", offset, got, want)
|
||||
}
|
||||
}
|
||||
|
||||
// The other stem should also have its single offset.
|
||||
otherBlob := rawdb.ReadBinTrieStem(db, otherStem)
|
||||
if got, _ := extractStemOffset(otherBlob, bintrie.BasicDataLeafKey); !bytes.Equal(got, otherVal) {
|
||||
t.Errorf("other stem BasicData: got %x, want %x", got, otherVal)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBintrieCodecFlushDelete verifies that nil-valued entries in the
|
||||
// accountData map clear the corresponding offset, and that clearing every
|
||||
// populated offset at a stem removes the on-disk key entirely (matching
|
||||
// the per-call DeleteStorage semantics tested elsewhere).
|
||||
func TestBintrieCodecFlushDelete(t *testing.T) {
|
||||
codec, db := newTestBintrieCodec(t)
|
||||
|
||||
// Seed: write two offsets at one stem.
|
||||
stem := bytes.Repeat([]byte{0x77}, bintrie.StemSize)
|
||||
v0 := bytes.Repeat([]byte{0x01}, stemBlobValueSize)
|
||||
v1 := bytes.Repeat([]byte{0x02}, stemBlobValueSize)
|
||||
|
||||
mkKey := func(offset byte) common.Hash {
|
||||
var k common.Hash
|
||||
copy(k[:bintrie.StemSize], stem)
|
||||
k[bintrie.StemSize] = offset
|
||||
return k
|
||||
}
|
||||
batch := db.NewBatch()
|
||||
codec.Flush(batch, nil, map[common.Hash][]byte{
|
||||
mkKey(0): v0,
|
||||
mkKey(1): v1,
|
||||
}, nil, nil)
|
||||
flushBatch(t, batch)
|
||||
|
||||
// Now flush a nil for offset 0 — only offset 1 should remain.
|
||||
batch = db.NewBatch()
|
||||
codec.Flush(batch, nil, map[common.Hash][]byte{mkKey(0): nil}, nil, nil)
|
||||
flushBatch(t, batch)
|
||||
|
||||
blob := rawdb.ReadBinTrieStem(db, stem)
|
||||
if got, _ := extractStemOffset(blob, 0); got != nil {
|
||||
t.Errorf("offset 0 should be cleared, got %x", got)
|
||||
}
|
||||
if got, _ := extractStemOffset(blob, 1); !bytes.Equal(got, v1) {
|
||||
t.Errorf("offset 1 should survive, got %x want %x", got, v1)
|
||||
}
|
||||
|
||||
// Clear the last remaining offset; the on-disk key should disappear.
|
||||
batch = db.NewBatch()
|
||||
codec.Flush(batch, nil, map[common.Hash][]byte{mkKey(1): nil}, nil, nil)
|
||||
flushBatch(t, batch)
|
||||
|
||||
if raw := rawdb.ReadBinTrieStem(db, stem); raw != nil {
|
||||
t.Errorf("stem should be deleted, got %x", raw)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,8 +17,6 @@
|
|||
package pathdb
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/VictoriaMetrics/fastcache"
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/core/rawdb"
|
||||
|
|
@ -71,71 +69,16 @@ func writeNodes(batch ethdb.Batch, nodes map[common.Hash]map[string]*trienode.No
|
|||
// This function assumes the background generator is already terminated and states
|
||||
// before the supplied marker has been correctly generated.
|
||||
//
|
||||
// The codec parameter abstracts the trie-specific persistence and cache key
|
||||
// derivation. The marker comparisons retain merkle-specific shape (two-tier
|
||||
// account+storage marker) because the bintrie path uses a separate writer
|
||||
// (writeStems, added in a later commit) that operates on a single-tier
|
||||
// marker over stems rather than (account, storage) pairs.
|
||||
// The codec parameter abstracts the trie-specific persistence: merkleFlatCodec
|
||||
// performs a per-entry rawdb write for each accountData/storageData entry,
|
||||
// while bintrieFlatCodec aggregates per-offset writes into per-stem
|
||||
// read-modify-writes. Either way, the genMarker filtering, cache update, and
|
||||
// metric reporting all happen inside the codec — writeStates is just a thin
|
||||
// dispatcher.
|
||||
//
|
||||
// TODO(rjl493456442) do we really need this generation marker? The state updates
|
||||
// after the marker can also be written and will be fixed by generator later if
|
||||
// it's outdated.
|
||||
func writeStates(batch ethdb.Batch, codec flatStateCodec, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int) {
|
||||
var (
|
||||
accounts int
|
||||
slots int
|
||||
)
|
||||
for addrHash, blob := range accountData {
|
||||
// Skip any account not yet covered by the snapshot. The account
|
||||
// at the generation marker position (addrHash == genMarker[:common.HashLength])
|
||||
// should still be updated, as it would be skipped in the next
|
||||
// generation cycle.
|
||||
if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 {
|
||||
continue
|
||||
}
|
||||
accounts += 1
|
||||
cacheKey := codec.AccountCacheKey(addrHash)
|
||||
if len(blob) == 0 {
|
||||
codec.DeleteAccount(batch, addrHash)
|
||||
if clean != nil {
|
||||
clean.Set(cacheKey, nil)
|
||||
}
|
||||
} else {
|
||||
codec.WriteAccount(batch, addrHash, blob)
|
||||
if clean != nil {
|
||||
clean.Set(cacheKey, blob)
|
||||
}
|
||||
}
|
||||
}
|
||||
for addrHash, storages := range storageData {
|
||||
// Skip any account not covered yet by the snapshot
|
||||
if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 {
|
||||
continue
|
||||
}
|
||||
midAccount := genMarker != nil && bytes.Equal(addrHash[:], genMarker[:common.HashLength])
|
||||
|
||||
for storageHash, blob := range storages {
|
||||
// Skip any storage slot not yet covered by the snapshot. The storage slot
|
||||
// at the generation marker position (addrHash == genMarker[:common.HashLength]
|
||||
// and storageHash == genMarker[common.HashLength:]) should still be updated,
|
||||
// as it would be skipped in the next generation cycle.
|
||||
if midAccount && bytes.Compare(storageHash[:], genMarker[common.HashLength:]) > 0 {
|
||||
continue
|
||||
}
|
||||
slots += 1
|
||||
cacheKey := codec.StorageCacheKey(addrHash, storageHash)
|
||||
if len(blob) == 0 {
|
||||
codec.DeleteStorage(batch, addrHash, storageHash)
|
||||
if clean != nil {
|
||||
clean.Set(cacheKey, nil)
|
||||
}
|
||||
} else {
|
||||
codec.WriteStorage(batch, addrHash, storageHash, blob)
|
||||
if clean != nil {
|
||||
clean.Set(cacheKey, blob)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return accounts, slots
|
||||
return codec.Flush(batch, genMarker, accountData, storageData, clean)
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue