From 29ef7576d962ddd837516baebb74a7003c5897b4 Mon Sep 17 00:00:00 2001
From: CPerezz <cperezz19@pm.me>
Date: Tue, 7 Apr 2026 19:57:55 +0200
Subject: [PATCH] core/state: hook leaf production in binaryHasher
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

binaryHasher now implements the new LeafProducer optional extension to
the Hasher interface. Every UpdateAccount, UpdateStorage, and delete
path records the corresponding (stem, offset, value) write into an
internal buffer, which the caller drains once per block via
DrainStemWrites() and hands to the pathdb flat-state layer through the
stateUpdate (wired up in the next commit).

Three kinds of writes are recorded:

  - Account create/update: two writes (BasicData at offset 0,
    CodeHash at offset 1), sharing the same 31-byte stem. BasicData
    is produced via bintrie.PackBasicData so the flat-state blob
    is bit-identical to what the trie layer packs internally.

  - Storage update: one write per slot. Non-zero values become
    right-justified 32-byte blobs; the zero value (the bintrie's
    "delete" convention) becomes 32 zero bytes, matching the trie's
    tombstone-with-zero semantics so the flat-state mirror stays
    bit-identical to the StemNode.Values entry.

  - Account delete: two clear writes (nil Value) for offsets 0 and 1.
    Storage slots and code chunks at the same or other stems are NOT
    touched; pre-EIP-6780 full-wipe is a documented scope limitation.

The LeafProducer interface lives on Hasher and is strictly opt-in —
merkleHasher does not implement it, and callers detect capability via
a type assertion. This keeps the read-side/write-side split of the
existing Hasher cleanly extended: hashers that have a concept of
flat-state leaves can expose them; hashers that don't (MPT) are
unaffected.

Tests cover:

  - TestBinaryHasherLeafProduction: account update produces 2 writes
    at offsets 0+1 with matching stem; drain is destructive; storage
    update emits one matching write; zero-value storage writes 32 zero
    bytes; delete emits 2 clear writes.
  - TestMerkleHasherNoLeafProducer: merkleHasher does NOT satisfy the
    LeafProducer interface (the capability is opt-in per hasher).

The collected stem writes are not yet propagated anywhere — a later
commit wires DrainStemWrites into StateDB.IntermediateRoot so the
writes flow through stateUpdate and the pathdb stateSet into the
flat-state layer.
---
 core/state/database_hasher.go             |  49 +++++++++
 core/state/database_hasher_binary.go      | 100 ++++++++++++++++++-
 core/state/database_hasher_binary_test.go | 116 ++++++++++++++++++++++
 3 files changed, 264 insertions(+), 1 deletion(-)

diff --git a/core/state/database_hasher.go b/core/state/database_hasher.go
index b3f056d4d3..cf42d71e1c 100644
--- a/core/state/database_hasher.go
+++ b/core/state/database_hasher.go
@@ -54,6 +54,55 @@ type Hashes struct {
 	Prev common.Hash // Pre-mutation root
 }
 
+// StemWrite describes a single write to a bintrie stem offset. It is used
+// by LeafProducer-capable hashers to report flat-state mutations derived
+// from their trie updates so a downstream flat-state layer can be kept
+// consistent with the hasher's on-trie view.
+//
+// Stem is the 31-byte common prefix of the EIP-7864 tree key. Offset is
+// the index into the stem's 256-value group (0..255). Value is the
+// 32-byte leaf value that was written; the caller uses the per-call
+// policy documented on the binary hasher:
+//   - Account create/update: two writes (BasicData, CodeHash) with
+//     non-nil 32-byte values.
+//   - Storage update to a non-zero value: one write with the 32-byte
+//     normalized value.
+//   - Storage update to zero (the bintrie's "delete" convention): one
+//     write with 32 zero bytes (tombstone / present with zero).
+//   - Account delete: two writes with nil values, signalling the flat
+//     state to clear the corresponding offsets.
+type StemWrite struct {
+	Stem   [31]byte
+	Offset byte
+	Value  []byte
+}
+
+// LeafProducer is an optional extension to Hasher for implementations
+// that track flat-state mutations alongside trie updates. Callers use it
+// to harvest the set of stem writes needed to keep an out-of-band flat
+// state layer consistent with the hasher's trie mutations.
+//
+// The binary hasher implements this interface; the merkle hasher does
+// not, because merkle flat state is MPT-shaped and does not use stems.
+// Callers check via a type assertion:
+//
+//	if lp, ok := h.(LeafProducer); ok {
+//	    writes := lp.DrainStemWrites()
+//	    // ... propagate writes into the state update ...
+//	}
+//
+// DrainStemWrites is intended to be called ONCE per block, AFTER all
+// UpdateAccount/UpdateStorage calls for that block have completed. The
+// implementation must reset its internal buffer on drain so subsequent
+// calls return only writes accumulated since the last drain.
+type LeafProducer interface {
+	// DrainStemWrites returns all stem writes accumulated since the last
+	// drain, in the order they were produced, and resets the internal
+	// buffer. The returned slice is owned by the caller; the hasher
+	// allocates a fresh slice on the next update.
+	DrainStemWrites() []StemWrite
+}
+
 // Hasher defines the minimal interface for computing state root hashes.
 //
 // It abstracts over different trie implementations, such as the traditional
diff --git a/core/state/database_hasher_binary.go b/core/state/database_hasher_binary.go
index 89850b377f..355f56b25d 100644
--- a/core/state/database_hasher_binary.go
+++ b/core/state/database_hasher_binary.go
@@ -126,14 +126,31 @@ func (tr *warpBinTrie) copy() *warpBinTrie {
 // binaryHasher is a Hasher implementation backed by a unified single-layer
 // binary trie. Accounts, storage slots, and contract code all reside in one
 // trie, keyed according to the EIP-7864 address space layout.
+//
+// binaryHasher also implements LeafProducer: alongside every trie mutation
+// it records the corresponding (stem, offset, value) write into an
+// internal buffer. The caller (StateDB.Commit in a later commit) drains
+// this buffer once per block and hands the writes to the pathdb flat-state
+// layer via the stateUpdate, keeping the bintrie trie and its flat-state
+// mirror consistent without recomputing the bintrie key derivation twice.
 type binaryHasher struct {
 	db   *triedb.Database
 	root common.Hash
 
 	prefetch bool
 	trie     *warpBinTrie
+
+	// leaves buffers flat-state writes produced as a side-effect of
+	// UpdateAccount/UpdateStorage/deleteAccount. It is cleared by
+	// DrainStemWrites. Direct reads and writes to this slice are only
+	// safe from the single goroutine that owns the hasher; the Hasher
+	// interface already requires single-threaded use per block.
+	leaves []StemWrite
 }
 
+// Compile-time assertion that binaryHasher implements LeafProducer.
+var _ LeafProducer = (*binaryHasher)(nil)
+
 func newBinaryHasher(root common.Hash, db *triedb.Database, prefetch bool, prefetchRead bool) (*binaryHasher, error) {
 	tr, err := newWrapBinTrie(root, db, prefetch, prefetchRead)
 	if err != nil {
@@ -147,8 +164,58 @@ func newBinaryHasher(root common.Hash, db *triedb.Database, prefetch bool, prefe
 	}, nil
 }
 
+// DrainStemWrites implements LeafProducer. It returns the buffered stem
+// writes accumulated since the last drain and resets the buffer. The
+// returned slice is owned by the caller; the hasher allocates a fresh
+// backing array on the next update.
+func (h *binaryHasher) DrainStemWrites() []StemWrite {
+	out := h.leaves
+	h.leaves = nil
+	return out
+}
+
+// recordLeaf appends a single stem write to the internal buffer. The
+// stem is taken from the first 31 bytes of the supplied 32-byte tree
+// key, and the offset is the last byte. Value may be nil (for clearing
+// a slot in the flat state, matching account deletion) or a 32-byte
+// slice (for writes).
+func (h *binaryHasher) recordLeaf(fullKey []byte, value []byte) {
+	var w StemWrite
+	copy(w.Stem[:], fullKey[:bintrie.StemSize])
+	w.Offset = fullKey[bintrie.StemSize]
+	if value != nil {
+		w.Value = make([]byte, len(value))
+		copy(w.Value, value)
+	}
+	h.leaves = append(h.leaves, w)
+}
+
 // deleteAccount removes the account specified by the address from the state.
+//
+// In addition to the trie mutation, this records two "clear" stem writes
+// (one for BasicData at offset 0 and one for CodeHash at offset 1) so
+// the flat-state mirror can drop the matching entries.
+//
+// Note: BinaryTrie.DeleteAccount is currently a no-op upstream
+// (tracked as a standalone bugfix PR against ethereum/go-ethereum).
+// Until that fix lands the on-trie deletion does nothing, but the
+// flat-state mirror will still drop its copy — a minor temporary
+// inconsistency scoped to the account-delete path. Once the trie fix
+// lands the two sides converge.
+//
+// Storage slots and code chunks at the same or other stems are NOT
+// touched by this function; callers that need a full account wipe must
+// walk storage explicitly. Pre-EIP-6780 self-destruct wipe is a
+// documented scope limitation.
 func (h *binaryHasher) deleteAccount(addr common.Address) error {
+	// Record the flat-state mutations BEFORE the trie call so the
+	// buffer still reflects the intended write even if the trie layer
+	// errors and we need to roll things back.
+	basicDataKey := bintrie.GetBinaryTreeKeyBasicData(addr)
+	codeHashKey := bintrie.GetBinaryTreeKeyCodeHash(addr)
+	h.recordLeaf(basicDataKey, nil) // nil → clear the flat-state offset
+	h.recordLeaf(codeHashKey, nil)
+
 	return h.trie.DeleteAccount(addr)
 }
 
@@ -174,6 +241,19 @@ func (h *binaryHasher) updateAccount(addr common.Address, account AccountMut) er
 	if err := h.trie.UpdateAccount(addr, data, account.CodeSize); err != nil {
 		return err
 	}
+	// Record the two flat-state writes that correspond to the on-trie
+	// BasicData (offset 0) and CodeHash (offset 1) at the account's
+	// stem. PackBasicData produces the same 32-byte blob that the trie
+	// layer packs internally, so the flat-state mirror encodes
+	// bit-identically.
+	basicData := bintrie.PackBasicData(data.Nonce, data.Balance, account.CodeSize)
+	h.recordLeaf(bintrie.GetBinaryTreeKeyBasicData(addr), basicData[:])
+
+	// CodeHash is a 32-byte value written straight into offset 1.
+	// EOAs store types.EmptyCodeHash here (a known non-zero hash) so
+	// the flat-state offset is always set after any non-delete update.
+	h.recordLeaf(bintrie.GetBinaryTreeKeyCodeHash(addr), data.CodeHash)
+
 	// Write chunked code into the trie when dirty.
 	if account.Code != nil && len(account.Code.Code) > 0 {
 		codeHash := common.BytesToHash(account.Account.CodeHash)
@@ -205,17 +285,35 @@ func (h *binaryHasher) UpdateAccount(addresses []common.Address, accounts []Acco
 // UpdateStorage implements Hasher, writing a list of storage slot mutations
 // into the state. This function must be invoked first before writing the
 // associated account metadata into the state.
+//
+// Each mutation is also recorded as a flat-state stem write. A zero value
+// is the bintrie's "delete" convention: the trie writes 32 zero bytes at
+// the slot, and the flat-state mirror does the same (a present-with-zero
+// tombstone) rather than removing the offset from its bitmap. This keeps
+// the trie and flat-state views bit-identical for the slot.
 func (h *binaryHasher) UpdateStorage(address common.Address, keys []common.Hash, values []common.Hash) error {
 	var err error
 	for i, key := range keys {
+		// BinaryTrie.UpdateStorage right-justifies a shorter input into
+		// 32 bytes; for a non-zero common.Hash the input is already 32
+		// bytes so the normalization is a no-op. For the zero-value
+		// case we emit 32 zero bytes explicitly to match the trie's
+		// tombstone convention.
+		var blob [bintrie.HashSize]byte
 		if values[i] == (common.Hash{}) {
 			err = h.trie.DeleteStorage(address, key[:])
 		} else {
-			err = h.trie.UpdateStorage(address, key[:], values[i][:])
+			copy(blob[:], values[i][:])
+			err = h.trie.UpdateStorage(address, key[:], blob[:])
 		}
 		if err != nil {
 			return err
 		}
+		// Record the flat-state mirror write regardless of zero/non-zero:
+		// the blob is 32 zero bytes in the delete case and the value in
+		// the non-delete case.
+		storageKey := bintrie.GetBinaryTreeKeyStorageSlot(address, key[:])
+		h.recordLeaf(storageKey, blob[:])
 	}
 	return nil
 }
diff --git a/core/state/database_hasher_binary_test.go b/core/state/database_hasher_binary_test.go
index 13fe28fc75..2be3d773a4 100644
--- a/core/state/database_hasher_binary_test.go
+++ b/core/state/database_hasher_binary_test.go
@@ -17,12 +17,14 @@
 package state
 
 import (
+	"bytes"
 	"testing"
 
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/core/stateless"
 	"github.com/ethereum/go-ethereum/core/types"
+	"github.com/ethereum/go-ethereum/trie/bintrie"
 	"github.com/ethereum/go-ethereum/triedb"
 )
 
@@ -266,3 +268,117 @@ func TestBinaryHasherWitness(t *testing.T) {
 		t.Fatalf("read-only prefetching should add extra nodes to witness: got %d (with read) vs %d (without)", nodesWithRead, nodesWithoutRead)
 	}
 }
+
+// TestBinaryHasherLeafProduction verifies that binaryHasher implements
+// LeafProducer and reports stem writes corresponding to each trie
+// mutation. Covers the three mutation kinds the hasher performs:
+// account update, storage update, and account delete.
+func TestBinaryHasherLeafProduction(t *testing.T) {
+	db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), triedb.VerkleDefaults)
+	h := newTestBinaryHasher(t, db, types.EmptyBinaryHash, hasherTestConfig{"leaf", false, false})
+
+	// Type assertion: binaryHasher must satisfy LeafProducer.
+	lp, ok := Hasher(h).(LeafProducer)
+	if !ok {
+		t.Fatal("binaryHasher should implement LeafProducer")
+	}
+
+	// --- Account update: expect two writes (BasicData + CodeHash) ---
+	if err := h.UpdateAccount(
+		[]common.Address{hasherAddr1},
+		[]AccountMut{hasherAccount(1, 100)},
+	); err != nil {
+		t.Fatalf("UpdateAccount: %v", err)
+	}
+	writes := lp.DrainStemWrites()
+	if len(writes) != 2 {
+		t.Fatalf("UpdateAccount: got %d stem writes, want 2 (BasicData + CodeHash)", len(writes))
+	}
+	// Offsets 0 and 1 respectively, and the BasicData stem matches the
+	// CodeHash stem (same address → same 31-byte stem).
+	if writes[0].Offset != bintrie.BasicDataLeafKey {
+		t.Errorf("write[0].Offset = %d, want %d (BasicDataLeafKey)", writes[0].Offset, bintrie.BasicDataLeafKey)
+	}
+	if writes[1].Offset != bintrie.CodeHashLeafKey {
+		t.Errorf("write[1].Offset = %d, want %d (CodeHashLeafKey)", writes[1].Offset, bintrie.CodeHashLeafKey)
+	}
+	if writes[0].Stem != writes[1].Stem {
+		t.Errorf("stems differ: %x vs %x", writes[0].Stem, writes[1].Stem)
+	}
+	if len(writes[0].Value) != 32 {
+		t.Errorf("write[0].Value length = %d, want 32", len(writes[0].Value))
+	}
+	if len(writes[1].Value) != 32 {
+		t.Errorf("write[1].Value length = %d, want 32", len(writes[1].Value))
+	}
+	// The code hash leaf should be the empty-code hash (non-zero).
+	if !bytes.Equal(writes[1].Value, types.EmptyCodeHash.Bytes()) {
+		t.Errorf("write[1].Value = %x, want empty code hash %x", writes[1].Value, types.EmptyCodeHash.Bytes())
+	}
+
+	// --- Drain again: should be empty (drain is destructive) ---
+	if again := lp.DrainStemWrites(); len(again) != 0 {
+		t.Fatalf("second drain should be empty, got %d writes", len(again))
+	}
+
+	// --- Storage update: non-zero value produces one write ---
+	if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot1}, []common.Hash{hasherVal1}); err != nil {
+		t.Fatalf("UpdateStorage: %v", err)
+	}
+	writes = lp.DrainStemWrites()
+	if len(writes) != 1 {
+		t.Fatalf("UpdateStorage: got %d writes, want 1", len(writes))
+	}
+	// The recorded value should match hasherVal1 (a common.Hash), which
+	// is already 32 bytes wide.
+	if !bytes.Equal(writes[0].Value, hasherVal1[:]) {
+		t.Errorf("UpdateStorage value: got %x, want %x", writes[0].Value, hasherVal1)
+	}
+
+	// --- Storage "delete" (zero value): one write with 32 zero bytes ---
+	if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot1}, []common.Hash{{}}); err != nil {
+		t.Fatalf("UpdateStorage (zero): %v", err)
+	}
+	writes = lp.DrainStemWrites()
+	if len(writes) != 1 {
+		t.Fatalf("UpdateStorage (zero): got %d writes, want 1", len(writes))
+	}
+	var zeros [32]byte
+	if !bytes.Equal(writes[0].Value, zeros[:]) {
+		t.Errorf("zero-value storage write should record 32 zero bytes, got %x", writes[0].Value)
+	}
+
+	// --- Account delete: two writes with nil values ---
+	if err := h.UpdateAccount(
+		[]common.Address{hasherAddr1},
+		[]AccountMut{{Account: nil}},
+	); err != nil {
+		t.Fatalf("UpdateAccount delete: %v", err)
+	}
+	writes = lp.DrainStemWrites()
+	if len(writes) != 2 {
+		t.Fatalf("delete: got %d writes, want 2 (BasicData + CodeHash clear)", len(writes))
+	}
+	for i, w := range writes {
+		if w.Value != nil {
+			t.Errorf("delete write[%d] should have nil Value (clear), got %x", i, w.Value)
+		}
+	}
+	if writes[0].Offset != bintrie.BasicDataLeafKey || writes[1].Offset != bintrie.CodeHashLeafKey {
+		t.Errorf("delete offsets: got %d,%d, want %d,%d", writes[0].Offset, writes[1].Offset, bintrie.BasicDataLeafKey, bintrie.CodeHashLeafKey)
+	}
+}
+
+// TestMerkleHasherNoLeafProducer verifies that merkleHasher does NOT
+// implement LeafProducer — the interface is strictly opt-in and the MPT
+// path has no concept of stem writes.
+func TestMerkleHasherNoLeafProducer(t *testing.T) {
+	db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), nil)
+	h, err := newMerkleHasher(types.EmptyRootHash, db, false, false)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, ok := Hasher(h).(LeafProducer); ok {
+		t.Fatal("merkleHasher should NOT implement LeafProducer")
+	}
+}