diff --git a/core/state/database_hasher.go b/core/state/database_hasher.go index b3f056d4d3..cf42d71e1c 100644 --- a/core/state/database_hasher.go +++ b/core/state/database_hasher.go @@ -54,6 +54,55 @@ type Hashes struct { Prev common.Hash // Pre-mutation root } +// StemWrite describes a single write to a bintrie stem offset. It is used +// by LeafProducer-capable hashers to report flat-state mutations derived +// from their trie updates so a downstream flat-state layer can be kept +// consistent with the hasher's on-trie view. +// +// Stem is the 31-byte common prefix of the EIP-7864 tree key. Offset is +// the index into the stem's 256-value group (0..255). Value is the +// 32-byte leaf value that was written; the caller uses the per-call +// policy documented on the binary hasher: +// - Account create/update: two writes (BasicData, CodeHash) with +// non-nil 32-byte values. +// - Storage update to a non-zero value: one write with the 32-byte +// normalized value. +// - Storage update to zero (the bintrie's "delete" convention): one +// write with 32 zero bytes (tombstone / present with zero). +// - Account delete: two writes with nil values, signalling the flat +// state to clear the corresponding offsets. +type StemWrite struct { + Stem [31]byte + Offset byte + Value []byte +} + +// LeafProducer is an optional extension to Hasher for implementations +// that track flat-state mutations alongside trie updates. Callers use it +// to harvest the set of stem writes needed to keep an out-of-band flat +// state layer consistent with the hasher's trie mutations. +// +// The binary hasher implements this interface; the merkle hasher does +// not, because merkle flat state is MPT-shaped and does not use stems. +// Callers check via a type assertion: +// +// if lp, ok := h.(LeafProducer); ok { +// writes := lp.DrainStemWrites() +// // ... propagate writes into the state update ... +// } +// +// DrainStemWrites is intended to be called ONCE per block, AFTER all +// UpdateAccount/UpdateStorage calls for that block have completed. The +// implementation must reset its internal buffer on drain so subsequent +// calls return only writes accumulated since the last drain. +type LeafProducer interface { + // DrainStemWrites returns all stem writes accumulated since the last + // drain, in the order they were produced, and resets the internal + // buffer. The returned slice is owned by the caller; the hasher + // allocates a fresh slice on the next update. + DrainStemWrites() []StemWrite +} + // Hasher defines the minimal interface for computing state root hashes. // // It abstracts over different trie implementations, such as the traditional diff --git a/core/state/database_hasher_binary.go b/core/state/database_hasher_binary.go index 89850b377f..355f56b25d 100644 --- a/core/state/database_hasher_binary.go +++ b/core/state/database_hasher_binary.go @@ -126,14 +126,31 @@ func (tr *warpBinTrie) copy() *warpBinTrie { // binaryHasher is a Hasher implementation backed by a unified single-layer // binary trie. Accounts, storage slots, and contract code all reside in one // trie, keyed according to the EIP-7864 address space layout. +// +// binaryHasher also implements LeafProducer: alongside every trie mutation +// it records the corresponding (stem, offset, value) write into an +// internal buffer. The caller (StateDB.Commit in a later commit) drains +// this buffer once per block and hands the writes to the pathdb flat-state +// layer via the stateUpdate, keeping the bintrie trie and its flat-state +// mirror consistent without recomputing the bintrie key derivation twice. type binaryHasher struct { db *triedb.Database root common.Hash prefetch bool trie *warpBinTrie + + // leaves buffers flat-state writes produced as a side-effect of + // UpdateAccount/UpdateStorage/deleteAccount. It is cleared by + // DrainStemWrites. Direct reads and writes to this slice are only + // safe from the single goroutine that owns the hasher; the Hasher + // interface already requires single-threaded use per block. + leaves []StemWrite } +// Compile-time assertion that binaryHasher implements LeafProducer. +var _ LeafProducer = (*binaryHasher)(nil) + func newBinaryHasher(root common.Hash, db *triedb.Database, prefetch bool, prefetchRead bool) (*binaryHasher, error) { tr, err := newWrapBinTrie(root, db, prefetch, prefetchRead) if err != nil { @@ -147,8 +164,58 @@ func newBinaryHasher(root common.Hash, db *triedb.Database, prefetch bool, prefe }, nil } +// DrainStemWrites implements LeafProducer. It returns the buffered stem +// writes accumulated since the last drain and resets the buffer. The +// returned slice is owned by the caller; the hasher allocates a fresh +// backing array on the next update. +func (h *binaryHasher) DrainStemWrites() []StemWrite { + out := h.leaves + h.leaves = nil + return out +} + +// recordLeaf appends a single stem write to the internal buffer. The +// stem is taken from the first 31 bytes of the supplied 32-byte tree +// key, and the offset is the last byte. Value may be nil (for clearing +// a slot in the flat state, matching account deletion) or a 32-byte +// slice (for writes). +func (h *binaryHasher) recordLeaf(fullKey []byte, value []byte) { + var w StemWrite + copy(w.Stem[:], fullKey[:bintrie.StemSize]) + w.Offset = fullKey[bintrie.StemSize] + if value != nil { + w.Value = make([]byte, len(value)) + copy(w.Value, value) + } + h.leaves = append(h.leaves, w) +} + // deleteAccount removes the account specified by the address from the state. +// +// In addition to the trie mutation, this records two "clear" stem writes +// (one for BasicData at offset 0 and one for CodeHash at offset 1) so +// the flat-state mirror can drop the matching entries. +// +// Note: BinaryTrie.DeleteAccount is currently a no-op upstream +// (tracked as a standalone bugfix PR against ethereum/go-ethereum). +// Until that fix lands the on-trie deletion does nothing, but the +// flat-state mirror will still drop its copy — a minor temporary +// inconsistency scoped to the account-delete path. Once the trie fix +// lands the two sides converge. +// +// Storage slots and code chunks at the same or other stems are NOT +// touched by this function; callers that need a full account wipe must +// walk storage explicitly. Pre-EIP-6780 self-destruct wipe is a +// documented scope limitation. func (h *binaryHasher) deleteAccount(addr common.Address) error { + // Record the flat-state mutations BEFORE the trie call so the + // buffer still reflects the intended write even if the trie layer + // errors and we need to roll things back. + basicDataKey := bintrie.GetBinaryTreeKeyBasicData(addr) + codeHashKey := bintrie.GetBinaryTreeKeyCodeHash(addr) + h.recordLeaf(basicDataKey, nil) // nil → clear the flat-state offset + h.recordLeaf(codeHashKey, nil) + return h.trie.DeleteAccount(addr) } @@ -174,6 +241,19 @@ func (h *binaryHasher) updateAccount(addr common.Address, account AccountMut) er if err := h.trie.UpdateAccount(addr, data, account.CodeSize); err != nil { return err } + // Record the two flat-state writes that correspond to the on-trie + // BasicData (offset 0) and CodeHash (offset 1) at the account's + // stem. PackBasicData produces the same 32-byte blob that the trie + // layer packs internally, so the flat-state mirror encodes + // bit-identically. + basicData := bintrie.PackBasicData(data.Nonce, data.Balance, account.CodeSize) + h.recordLeaf(bintrie.GetBinaryTreeKeyBasicData(addr), basicData[:]) + + // CodeHash is a 32-byte value written straight into offset 1. + // EOAs store types.EmptyCodeHash here (a known non-zero hash) so + // the flat-state offset is always set after any non-delete update. + h.recordLeaf(bintrie.GetBinaryTreeKeyCodeHash(addr), data.CodeHash) + // Write chunked code into the trie when dirty. if account.Code != nil && len(account.Code.Code) > 0 { codeHash := common.BytesToHash(account.Account.CodeHash) @@ -205,17 +285,35 @@ func (h *binaryHasher) UpdateAccount(addresses []common.Address, accounts []Acco // UpdateStorage implements Hasher, writing a list of storage slot mutations // into the state. This function must be invoked first before writing the // associated account metadata into the state. +// +// Each mutation is also recorded as a flat-state stem write. A zero value +// is the bintrie's "delete" convention: the trie writes 32 zero bytes at +// the slot, and the flat-state mirror does the same (a present-with-zero +// tombstone) rather than removing the offset from its bitmap. This keeps +// the trie and flat-state views bit-identical for the slot. func (h *binaryHasher) UpdateStorage(address common.Address, keys []common.Hash, values []common.Hash) error { var err error for i, key := range keys { + // BinaryTrie.UpdateStorage right-justifies a shorter input into + // 32 bytes; for a non-zero common.Hash the input is already 32 + // bytes so the normalization is a no-op. For the zero-value + // case we emit 32 zero bytes explicitly to match the trie's + // tombstone convention. + var blob [bintrie.HashSize]byte if values[i] == (common.Hash{}) { err = h.trie.DeleteStorage(address, key[:]) } else { - err = h.trie.UpdateStorage(address, key[:], values[i][:]) + copy(blob[:], values[i][:]) + err = h.trie.UpdateStorage(address, key[:], blob[:]) } if err != nil { return err } + // Record the flat-state mirror write regardless of zero/non-zero: + // the blob is 32 zero bytes in the delete case and the value in + // the non-delete case. + storageKey := bintrie.GetBinaryTreeKeyStorageSlot(address, key[:]) + h.recordLeaf(storageKey, blob[:]) } return nil } diff --git a/core/state/database_hasher_binary_test.go b/core/state/database_hasher_binary_test.go index 13fe28fc75..2be3d773a4 100644 --- a/core/state/database_hasher_binary_test.go +++ b/core/state/database_hasher_binary_test.go @@ -17,12 +17,14 @@ package state import ( + "bytes" "testing" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/stateless" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/trie/bintrie" "github.com/ethereum/go-ethereum/triedb" ) @@ -266,3 +268,117 @@ func TestBinaryHasherWitness(t *testing.T) { t.Fatalf("read-only prefetching should add extra nodes to witness: got %d (with read) vs %d (without)", nodesWithRead, nodesWithoutRead) } } + +// TestBinaryHasherLeafProduction verifies that binaryHasher implements +// LeafProducer and reports stem writes corresponding to each trie +// mutation. Covers the three mutation kinds the hasher performs: +// account update, storage update, and account delete. +func TestBinaryHasherLeafProduction(t *testing.T) { + db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), triedb.VerkleDefaults) + h := newTestBinaryHasher(t, db, types.EmptyBinaryHash, hasherTestConfig{"leaf", false, false}) + + // Type assertion: binaryHasher must satisfy LeafProducer. + lp, ok := Hasher(h).(LeafProducer) + if !ok { + t.Fatal("binaryHasher should implement LeafProducer") + } + + // --- Account update: expect two writes (BasicData + CodeHash) --- + if err := h.UpdateAccount( + []common.Address{hasherAddr1}, + []AccountMut{hasherAccount(1, 100)}, + ); err != nil { + t.Fatalf("UpdateAccount: %v", err) + } + writes := lp.DrainStemWrites() + if len(writes) != 2 { + t.Fatalf("UpdateAccount: got %d stem writes, want 2 (BasicData + CodeHash)", len(writes)) + } + // Offsets 0 and 1 respectively, and the BasicData stem matches the + // CodeHash stem (same address → same 31-byte stem). + if writes[0].Offset != bintrie.BasicDataLeafKey { + t.Errorf("write[0].Offset = %d, want %d (BasicDataLeafKey)", writes[0].Offset, bintrie.BasicDataLeafKey) + } + if writes[1].Offset != bintrie.CodeHashLeafKey { + t.Errorf("write[1].Offset = %d, want %d (CodeHashLeafKey)", writes[1].Offset, bintrie.CodeHashLeafKey) + } + if writes[0].Stem != writes[1].Stem { + t.Errorf("stems differ: %x vs %x", writes[0].Stem, writes[1].Stem) + } + if len(writes[0].Value) != 32 { + t.Errorf("write[0].Value length = %d, want 32", len(writes[0].Value)) + } + if len(writes[1].Value) != 32 { + t.Errorf("write[1].Value length = %d, want 32", len(writes[1].Value)) + } + // The code hash leaf should be the empty-code hash (non-zero). + if !bytes.Equal(writes[1].Value, types.EmptyCodeHash.Bytes()) { + t.Errorf("write[1].Value = %x, want empty code hash %x", writes[1].Value, types.EmptyCodeHash.Bytes()) + } + + // --- Drain again: should be empty (drain is destructive) --- + if again := lp.DrainStemWrites(); len(again) != 0 { + t.Fatalf("second drain should be empty, got %d writes", len(again)) + } + + // --- Storage update: non-zero value produces one write --- + if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot1}, []common.Hash{hasherVal1}); err != nil { + t.Fatalf("UpdateStorage: %v", err) + } + writes = lp.DrainStemWrites() + if len(writes) != 1 { + t.Fatalf("UpdateStorage: got %d writes, want 1", len(writes)) + } + // The recorded value should match hasherVal1 (a common.Hash), which + // is already 32 bytes wide. + if !bytes.Equal(writes[0].Value, hasherVal1[:]) { + t.Errorf("UpdateStorage value: got %x, want %x", writes[0].Value, hasherVal1) + } + + // --- Storage "delete" (zero value): one write with 32 zero bytes --- + if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot1}, []common.Hash{{}}); err != nil { + t.Fatalf("UpdateStorage (zero): %v", err) + } + writes = lp.DrainStemWrites() + if len(writes) != 1 { + t.Fatalf("UpdateStorage (zero): got %d writes, want 1", len(writes)) + } + var zeros [32]byte + if !bytes.Equal(writes[0].Value, zeros[:]) { + t.Errorf("zero-value storage write should record 32 zero bytes, got %x", writes[0].Value) + } + + // --- Account delete: two writes with nil values --- + if err := h.UpdateAccount( + []common.Address{hasherAddr1}, + []AccountMut{{Account: nil}}, + ); err != nil { + t.Fatalf("UpdateAccount delete: %v", err) + } + writes = lp.DrainStemWrites() + if len(writes) != 2 { + t.Fatalf("delete: got %d writes, want 2 (BasicData + CodeHash clear)", len(writes)) + } + for i, w := range writes { + if w.Value != nil { + t.Errorf("delete write[%d] should have nil Value (clear), got %x", i, w.Value) + } + } + if writes[0].Offset != bintrie.BasicDataLeafKey || writes[1].Offset != bintrie.CodeHashLeafKey { + t.Errorf("delete offsets: got %d,%d, want %d,%d", writes[0].Offset, writes[1].Offset, bintrie.BasicDataLeafKey, bintrie.CodeHashLeafKey) + } +} + +// TestMerkleHasherNoLeafProducer verifies that merkleHasher does NOT +// implement LeafProducer — the interface is strictly opt-in and the MPT +// path has no concept of stem writes. +func TestMerkleHasherNoLeafProducer(t *testing.T) { + db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), nil) + h, err := newMerkleHasher(types.EmptyRootHash, db, false, false) + if err != nil { + t.Fatal(err) + } + if _, ok := Hasher(h).(LeafProducer); ok { + t.Fatal("merkleHasher should NOT implement LeafProducer") + } +}