mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-06-12 01:41:36 +00:00
trie/bintrie: skip clean nodes in CollectNodes to reduce commit write amplification
BinaryTrie.Commit unconditionally walked every resolved in-memory node
and flushed it into the NodeSet, producing one Pebble write per resolved
internal + stem node on every block — even when the node's on-disk blob
was bitwise identical to the previous commit. On a warm 400M-state
workload this meant tens of thousands of redundant 65-byte writes per
block, compounding Pebble compaction pressure on every commit.
The existing mustRecompute flag tracks hash staleness, not disk-blob
staleness: after Hash() completes, mustRecompute is cleared even though
the fresh blob has not been persisted. It is therefore insufficient for
a skip-flush optimization.
This change mirrors MPT's committer pattern (trie/committer.go:51-56)
by adding a dirty flag on InternalNode and StemNode with the semantics
"the on-disk blob is stale". The flag is:
- set to true wherever the node is created or structurally modified
(the same call sites that already set mustRecompute = true),
- set to false only after the node has been passed to the flushfn
inside CollectNodes,
- left false on nodes produced by DeserializeNodeWithHash, matching
the "loaded from disk, already persisted" semantics.
CollectNodes short-circuits on !dirty subtrees; the propagation
invariant (an ancestor of any dirty node is itself dirty) is already
maintained by the existing InsertValuesAtStem / Insert paths, which now
mirror every mustRecompute = true setter with a dirty = true setter.
Serialization format, hash computation, state root, and the pathdb
write path are untouched. Empty NodeSets are already tolerated by
triedb/pathdb.writeNodes.
BenchmarkCollectNodes_SparseWrite (10,000-stem trie, one-leaf
modification + Commit per iteration, Apple M4 Pro):
before 12,653,000 ns/op 107,224,740 B/op 80,953 allocs/op
after 7,336 ns/op 37,774 B/op 134 allocs/op
speedup: ~1,725x memory: ~2,839x less allocs: ~604x fewer
End-to-end impact on a benchmarked geth build depends on workload;
the new TestBinaryTrieCommitIncremental provides a structural
regression guard.
This commit is contained in:
parent
573d94013c
commit
fad11d5795
6 changed files with 248 additions and 9 deletions
|
|
@ -93,15 +93,15 @@ var invalidSerializedLength = errors.New("invalid serialized node length")
|
|||
// DeserializeNode deserializes a binary trie node from a byte slice. The
|
||||
// hash will be recomputed from the deserialized data.
|
||||
func DeserializeNode(serialized []byte, depth int) (BinaryNode, error) {
|
||||
return deserializeNode(serialized, depth, common.Hash{}, true)
|
||||
return deserializeNode(serialized, depth, common.Hash{}, true, true)
|
||||
}
|
||||
|
||||
// DeserializeNodeWithHash deserializes a binary trie node from a byte slice, using the provided hash.
|
||||
func DeserializeNodeWithHash(serialized []byte, depth int, hn common.Hash) (BinaryNode, error) {
|
||||
return deserializeNode(serialized, depth, hn, false)
|
||||
return deserializeNode(serialized, depth, hn, false, false)
|
||||
}
|
||||
|
||||
func deserializeNode(serialized []byte, depth int, hn common.Hash, mustRecompute bool) (BinaryNode, error) {
|
||||
func deserializeNode(serialized []byte, depth int, hn common.Hash, mustRecompute, dirty bool) (BinaryNode, error) {
|
||||
if len(serialized) == 0 {
|
||||
return Empty{}, nil
|
||||
}
|
||||
|
|
@ -117,6 +117,7 @@ func deserializeNode(serialized []byte, depth int, hn common.Hash, mustRecompute
|
|||
right: HashedNode(common.BytesToHash(serialized[33:65])),
|
||||
hash: hn,
|
||||
mustRecompute: mustRecompute,
|
||||
dirty: dirty,
|
||||
}, nil
|
||||
case nodeTypeStem:
|
||||
if len(serialized) < 64 {
|
||||
|
|
@ -141,6 +142,7 @@ func deserializeNode(serialized []byte, depth int, hn common.Hash, mustRecompute
|
|||
depth: depth,
|
||||
hash: hn,
|
||||
mustRecompute: mustRecompute,
|
||||
dirty: dirty,
|
||||
}, nil
|
||||
default:
|
||||
return nil, errors.New("invalid node type")
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ type InternalNode struct {
|
|||
depth int
|
||||
|
||||
mustRecompute bool // true if the hash needs to be recomputed
|
||||
dirty bool // true if the node's on-disk blob is stale (needs flush)
|
||||
hash common.Hash // cached hash when mustRecompute == false
|
||||
}
|
||||
|
||||
|
|
@ -135,6 +136,7 @@ func (bt *InternalNode) Copy() BinaryNode {
|
|||
right: bt.right.Copy(),
|
||||
depth: bt.depth,
|
||||
mustRecompute: bt.mustRecompute,
|
||||
dirty: bt.dirty,
|
||||
hash: bt.hash,
|
||||
}
|
||||
}
|
||||
|
|
@ -213,6 +215,7 @@ func (bt *InternalNode) InsertValuesAtStem(stem []byte, values [][]byte, resolve
|
|||
|
||||
bt.left, err = bt.left.InsertValuesAtStem(stem, values, resolver, depth+1)
|
||||
bt.mustRecompute = true
|
||||
bt.dirty = true
|
||||
return bt, err
|
||||
}
|
||||
|
||||
|
|
@ -238,12 +241,17 @@ func (bt *InternalNode) InsertValuesAtStem(stem []byte, values [][]byte, resolve
|
|||
|
||||
bt.right, err = bt.right.InsertValuesAtStem(stem, values, resolver, depth+1)
|
||||
bt.mustRecompute = true
|
||||
bt.dirty = true
|
||||
return bt, err
|
||||
}
|
||||
|
||||
// CollectNodes collects all child nodes at a given path, and flushes it
|
||||
// into the provided node collector.
|
||||
// into the provided node collector. Clean subtrees (dirty == false) are
|
||||
// skipped.
|
||||
func (bt *InternalNode) CollectNodes(path []byte, flushfn NodeFlushFn) error {
|
||||
if !bt.dirty {
|
||||
return nil
|
||||
}
|
||||
if bt.left != nil {
|
||||
var p [256]byte
|
||||
copy(p[:], path)
|
||||
|
|
@ -263,6 +271,7 @@ func (bt *InternalNode) CollectNodes(path []byte, flushfn NodeFlushFn) error {
|
|||
}
|
||||
}
|
||||
flushfn(path, bt)
|
||||
bt.dirty = false
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -351,17 +351,20 @@ func TestInternalNodeInsertValuesAtStem(t *testing.T) {
|
|||
|
||||
// TestInternalNodeCollectNodes tests CollectNodes method
|
||||
func TestInternalNodeCollectNodes(t *testing.T) {
|
||||
// Create an internal node with two stem children
|
||||
// Create an internal node with two stem children. All three are
|
||||
// marked dirty to mirror production semantics — see CollectNodes.
|
||||
leftStem := &StemNode{
|
||||
Stem: make([]byte, 31),
|
||||
Values: make([][]byte, 256),
|
||||
depth: 1,
|
||||
dirty: true,
|
||||
}
|
||||
|
||||
rightStem := &StemNode{
|
||||
Stem: make([]byte, 31),
|
||||
Values: make([][]byte, 256),
|
||||
depth: 1,
|
||||
dirty: true,
|
||||
}
|
||||
rightStem.Stem[0] = 0x80
|
||||
|
||||
|
|
@ -369,6 +372,7 @@ func TestInternalNodeCollectNodes(t *testing.T) {
|
|||
depth: 0,
|
||||
left: leftStem,
|
||||
right: rightStem,
|
||||
dirty: true,
|
||||
}
|
||||
|
||||
var collectedPaths [][]byte
|
||||
|
|
@ -405,6 +409,52 @@ func TestInternalNodeCollectNodes(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// TestInternalNodeCollectNodesSkipsClean verifies clean subtrees are not
|
||||
// flushed. A dirty internal node over clean children only flushes itself;
|
||||
// a fully clean tree flushes nothing.
|
||||
func TestInternalNodeCollectNodesSkipsClean(t *testing.T) {
|
||||
leftStem := &StemNode{
|
||||
Stem: make([]byte, 31),
|
||||
Values: make([][]byte, 256),
|
||||
depth: 1,
|
||||
}
|
||||
rightStem := &StemNode{
|
||||
Stem: make([]byte, 31),
|
||||
Values: make([][]byte, 256),
|
||||
depth: 1,
|
||||
}
|
||||
rightStem.Stem[0] = 0x80
|
||||
|
||||
dirtyParent := &InternalNode{
|
||||
depth: 0,
|
||||
left: leftStem,
|
||||
right: rightStem,
|
||||
dirty: true,
|
||||
}
|
||||
|
||||
var collected []BinaryNode
|
||||
flushFn := func(_ []byte, n BinaryNode) { collected = append(collected, n) }
|
||||
|
||||
if err := dirtyParent.CollectNodes([]byte{1}, flushFn); err != nil {
|
||||
t.Fatalf("CollectNodes: %v", err)
|
||||
}
|
||||
if len(collected) != 1 || collected[0] != dirtyParent {
|
||||
t.Fatalf("expected only the dirty parent to be flushed, got %d nodes", len(collected))
|
||||
}
|
||||
if dirtyParent.dirty {
|
||||
t.Errorf("parent dirty flag should be cleared after flush")
|
||||
}
|
||||
|
||||
// Second call on the same tree should be a no-op: everything is clean.
|
||||
collected = nil
|
||||
if err := dirtyParent.CollectNodes([]byte{1}, flushFn); err != nil {
|
||||
t.Fatalf("CollectNodes (second call): %v", err)
|
||||
}
|
||||
if len(collected) != 0 {
|
||||
t.Errorf("expected no nodes to be flushed on clean tree, got %d", len(collected))
|
||||
}
|
||||
}
|
||||
|
||||
// TestInternalNodeGetHeight tests GetHeight method
|
||||
func TestInternalNodeGetHeight(t *testing.T) {
|
||||
// Create a tree with different heights
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ type StemNode struct {
|
|||
depth int // Depth of the node
|
||||
|
||||
mustRecompute bool // true if the hash needs to be recomputed
|
||||
dirty bool // true if the node's on-disk blob is stale (needs flush)
|
||||
hash common.Hash // cached hash when mustRecompute == false
|
||||
}
|
||||
|
||||
|
|
@ -48,8 +49,11 @@ func (bt *StemNode) Insert(key []byte, value []byte, _ NodeResolverFn, depth int
|
|||
if !bytes.Equal(bt.Stem, key[:StemSize]) {
|
||||
bitStem := bt.Stem[bt.depth/8] >> (7 - (bt.depth % 8)) & 1
|
||||
|
||||
n := &InternalNode{depth: bt.depth, mustRecompute: true}
|
||||
n := &InternalNode{depth: bt.depth, mustRecompute: true, dirty: true}
|
||||
bt.depth++
|
||||
// bt is re-parented under n and sits at a new path — rewrite its blob.
|
||||
bt.mustRecompute = true
|
||||
bt.dirty = true
|
||||
var child, other *BinaryNode
|
||||
if bitStem == 0 {
|
||||
n.left = bt
|
||||
|
|
@ -77,6 +81,7 @@ func (bt *StemNode) Insert(key []byte, value []byte, _ NodeResolverFn, depth int
|
|||
Values: values[:],
|
||||
depth: depth + 1,
|
||||
mustRecompute: true,
|
||||
dirty: true,
|
||||
}
|
||||
}
|
||||
return n, nil
|
||||
|
|
@ -86,6 +91,7 @@ func (bt *StemNode) Insert(key []byte, value []byte, _ NodeResolverFn, depth int
|
|||
}
|
||||
bt.Values[key[StemSize]] = value
|
||||
bt.mustRecompute = true
|
||||
bt.dirty = true
|
||||
return bt, nil
|
||||
}
|
||||
|
||||
|
|
@ -101,6 +107,7 @@ func (bt *StemNode) Copy() BinaryNode {
|
|||
depth: bt.depth,
|
||||
hash: bt.hash,
|
||||
mustRecompute: bt.mustRecompute,
|
||||
dirty: bt.dirty,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -151,10 +158,14 @@ func (bt *StemNode) Hash() common.Hash {
|
|||
return bt.hash
|
||||
}
|
||||
|
||||
// CollectNodes collects all child nodes at a given path, and flushes it
|
||||
// into the provided node collector.
|
||||
// CollectNodes flushes the stem via the collector when dirty; clean stems
|
||||
// are skipped.
|
||||
func (bt *StemNode) CollectNodes(path []byte, flush NodeFlushFn) error {
|
||||
if !bt.dirty {
|
||||
return nil
|
||||
}
|
||||
flush(path, bt)
|
||||
bt.dirty = false
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
@ -172,8 +183,11 @@ func (bt *StemNode) InsertValuesAtStem(key []byte, values [][]byte, _ NodeResolv
|
|||
if !bytes.Equal(bt.Stem, key[:StemSize]) {
|
||||
bitStem := bt.Stem[bt.depth/8] >> (7 - (bt.depth % 8)) & 1
|
||||
|
||||
n := &InternalNode{depth: bt.depth, mustRecompute: true}
|
||||
n := &InternalNode{depth: bt.depth, mustRecompute: true, dirty: true}
|
||||
bt.depth++
|
||||
// bt is re-parented under n and sits at a new path — rewrite its blob.
|
||||
bt.mustRecompute = true
|
||||
bt.dirty = true
|
||||
var child, other *BinaryNode
|
||||
if bitStem == 0 {
|
||||
n.left = bt
|
||||
|
|
@ -199,6 +213,7 @@ func (bt *StemNode) InsertValuesAtStem(key []byte, values [][]byte, _ NodeResolv
|
|||
Values: values,
|
||||
depth: n.depth + 1,
|
||||
mustRecompute: true,
|
||||
dirty: true,
|
||||
}
|
||||
}
|
||||
return n, nil
|
||||
|
|
@ -209,6 +224,7 @@ func (bt *StemNode) InsertValuesAtStem(key []byte, values [][]byte, _ NodeResolv
|
|||
if v != nil {
|
||||
bt.Values[i] = v
|
||||
bt.mustRecompute = true
|
||||
bt.dirty = true
|
||||
}
|
||||
}
|
||||
return bt, nil
|
||||
|
|
|
|||
|
|
@ -379,6 +379,7 @@ func TestStemNodeCollectNodes(t *testing.T) {
|
|||
Stem: stem,
|
||||
Values: values[:],
|
||||
depth: 0,
|
||||
dirty: true,
|
||||
}
|
||||
|
||||
var collectedPaths [][]byte
|
||||
|
|
@ -412,3 +413,44 @@ func TestStemNodeCollectNodes(t *testing.T) {
|
|||
t.Errorf("Path mismatch: expected [0, 1, 0], got %v", collectedPaths[0])
|
||||
}
|
||||
}
|
||||
|
||||
// TestStemNodeCollectNodesSkipsClean verifies that a clean stem is not
|
||||
// flushed, and that flushing a dirty stem clears its dirty flag so that
|
||||
// a subsequent CollectNodes on the same node is a no-op.
|
||||
func TestStemNodeCollectNodesSkipsClean(t *testing.T) {
|
||||
stem := make([]byte, 31)
|
||||
node := &StemNode{
|
||||
Stem: stem,
|
||||
Values: make([][]byte, 256),
|
||||
depth: 0,
|
||||
}
|
||||
|
||||
var collected []BinaryNode
|
||||
flushFn := func(_ []byte, n BinaryNode) { collected = append(collected, n) }
|
||||
|
||||
if err := node.CollectNodes([]byte{0}, flushFn); err != nil {
|
||||
t.Fatalf("CollectNodes on clean stem: %v", err)
|
||||
}
|
||||
if len(collected) != 0 {
|
||||
t.Fatalf("expected clean stem not to be flushed, got %d", len(collected))
|
||||
}
|
||||
|
||||
node.dirty = true
|
||||
if err := node.CollectNodes([]byte{0}, flushFn); err != nil {
|
||||
t.Fatalf("CollectNodes on dirty stem: %v", err)
|
||||
}
|
||||
if len(collected) != 1 {
|
||||
t.Fatalf("expected dirty stem to be flushed once, got %d", len(collected))
|
||||
}
|
||||
if node.dirty {
|
||||
t.Errorf("stem dirty flag should be cleared after flush")
|
||||
}
|
||||
|
||||
collected = nil
|
||||
if err := node.CollectNodes([]byte{0}, flushFn); err != nil {
|
||||
t.Fatalf("CollectNodes after flush: %v", err)
|
||||
}
|
||||
if len(collected) != 0 {
|
||||
t.Errorf("expected no flush on clean stem, got %d", len(collected))
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -779,3 +779,123 @@ func TestGetStorageNonMembershipInternalRoot(t *testing.T) {
|
|||
t.Fatalf("expected nil/zero for non-existent storage, got %x", got)
|
||||
}
|
||||
}
|
||||
|
||||
// commitKeyN derives a distinct 32-byte key from a seed integer. Used by
|
||||
// TestBinaryTrieCommitIncremental and BenchmarkCollectNodes_SparseWrite to
|
||||
// populate a trie with many disjoint stems.
|
||||
func commitKeyN(i int) [HashSize]byte {
|
||||
var k [HashSize]byte
|
||||
binary.BigEndian.PutUint64(k[:8], uint64(i)*0x9e3779b97f4a7c15)
|
||||
binary.BigEndian.PutUint64(k[8:16], uint64(i)*0xc2b2ae3d27d4eb4f)
|
||||
binary.BigEndian.PutUint64(k[16:24], uint64(i)*0x165667b19e3779f9)
|
||||
binary.BigEndian.PutUint64(k[24:32], uint64(i)*0x85ebca77c2b2ae63)
|
||||
return k
|
||||
}
|
||||
|
||||
// TestBinaryTrieCommitIncremental verifies that a second Commit with only a
|
||||
// single modified leaf flushes only the path from that leaf to the root,
|
||||
// not the entire tree.
|
||||
func TestBinaryTrieCommitIncremental(t *testing.T) {
|
||||
tr := &BinaryTrie{
|
||||
root: NewBinaryNode(),
|
||||
tracer: trie.NewPrevalueTracer(),
|
||||
}
|
||||
|
||||
const n = 512
|
||||
keys := make([][HashSize]byte, n)
|
||||
for i := range n {
|
||||
keys[i] = commitKeyN(i + 1)
|
||||
var v [HashSize]byte
|
||||
binary.BigEndian.PutUint64(v[24:], uint64(i+1))
|
||||
var err error
|
||||
tr.root, err = tr.root.Insert(keys[i][:], v[:], nil, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("Insert %d: %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
_, ns1 := tr.Commit(false)
|
||||
if len(ns1.Nodes) == 0 {
|
||||
t.Fatal("first Commit produced empty NodeSet")
|
||||
}
|
||||
if len(ns1.Nodes) < n {
|
||||
t.Fatalf("first Commit: expected at least %d nodes, got %d", n, len(ns1.Nodes))
|
||||
}
|
||||
|
||||
// Second Commit on the same trie with no modifications: NodeSet must
|
||||
// be empty because every subtree is clean.
|
||||
_, nsNoop := tr.Commit(false)
|
||||
if len(nsNoop.Nodes) != 0 {
|
||||
t.Fatalf("no-op Commit: expected empty NodeSet, got %d nodes", len(nsNoop.Nodes))
|
||||
}
|
||||
|
||||
// Modify a single leaf's value. Only the path from that leaf to the
|
||||
// root should appear in the next Commit's NodeSet.
|
||||
var newVal [HashSize]byte
|
||||
newVal[0] = 0xff
|
||||
var err error
|
||||
tr.root, err = tr.root.Insert(keys[n/2][:], newVal[:], nil, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("Insert (modify): %v", err)
|
||||
}
|
||||
_, ns2 := tr.Commit(false)
|
||||
|
||||
// Path length for a binary trie of n=512 stems is bounded by the
|
||||
// internal depth at which the modified stem sits. Allow generous
|
||||
// slack: up to 64 nodes is fine, anywhere near n (512) is a regression.
|
||||
if len(ns2.Nodes) == 0 {
|
||||
t.Fatal("modified Commit produced empty NodeSet")
|
||||
}
|
||||
if len(ns2.Nodes) > 64 {
|
||||
t.Fatalf("modified Commit: expected small NodeSet, got %d nodes (first Commit had %d)", len(ns2.Nodes), len(ns1.Nodes))
|
||||
}
|
||||
if len(ns2.Nodes) >= len(ns1.Nodes) {
|
||||
t.Fatalf("expected second NodeSet (%d) to be smaller than first (%d)", len(ns2.Nodes), len(ns1.Nodes))
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkCollectNodes_SparseWrite measures Commit cost when only one leaf
|
||||
// changes between blocks — the common case for state updates. After warm-up
|
||||
// (populate + initial Commit), each iteration modifies a single leaf and
|
||||
// re-Commits. Under the skip-clean optimization, each iteration flushes
|
||||
// only the root-to-leaf path; pre-fix behavior would re-flush the entire
|
||||
// tree every iteration.
|
||||
func BenchmarkCollectNodes_SparseWrite(b *testing.B) {
|
||||
const n = 10_000
|
||||
|
||||
tr := &BinaryTrie{
|
||||
root: NewBinaryNode(),
|
||||
tracer: trie.NewPrevalueTracer(),
|
||||
}
|
||||
keys := make([][HashSize]byte, n)
|
||||
for i := range n {
|
||||
keys[i] = commitKeyN(i + 1)
|
||||
var v [HashSize]byte
|
||||
binary.BigEndian.PutUint64(v[24:], uint64(i+1))
|
||||
var err error
|
||||
tr.root, err = tr.root.Insert(keys[i][:], v[:], nil, 0)
|
||||
if err != nil {
|
||||
b.Fatalf("Insert %d: %v", i, err)
|
||||
}
|
||||
}
|
||||
// Flush the initial tree so subsequent Commits reflect the
|
||||
// single-modification workload we want to measure.
|
||||
_, _ = tr.Commit(false)
|
||||
|
||||
var newVal [HashSize]byte
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
idx := i % n
|
||||
binary.BigEndian.PutUint64(newVal[24:], uint64(i+1))
|
||||
var err error
|
||||
tr.root, err = tr.root.Insert(keys[idx][:], newVal[:], nil, 0)
|
||||
if err != nil {
|
||||
b.Fatalf("Insert at iter %d: %v", i, err)
|
||||
}
|
||||
_, ns := tr.Commit(false)
|
||||
if len(ns.Nodes) == 0 {
|
||||
b.Fatalf("iter %d: empty NodeSet", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue