mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-06-12 09:51:36 +00:00
trie/bintrie: port dirty flag + CollectNodes skip-clean from master
Master added (via PR #34754) a dirty bool to InternalNode/StemNode plus a CollectNodes short-circuit that skips clean subtrees — the arena branch diverged before that landed. Port the semantics onto the arena shape: - Add dirty bool to InternalNode and StemNode. - Wire dirty=true alongside every existing mustRecompute=true setter in node_store.go (newInternalRef, newStemRef) and store_ops.go (8 mutation sites across InsertSingle/insertSingleInternal/InsertValuesAtStem/ insertValuesAtStem/splitStemInsert/splitStemValuesInsert). - Add 'if !node.dirty { return nil }' gate at the top of CollectNodes for both KindInternal and KindStem; clear dirty after flushfn runs. - Plumb a dirty parameter through deserializeNode; DeserializeNode passes dirty=true (safe default), DeserializeNodeWithHash passes dirty=false (loaded from disk, blob matches). The arena test in trie_test.go that was auto-merged from master used master-shape struct literals (tr.root, NewBinaryNode) that don't exist on arena; delete those and replace with TestCommitSkipCleanSubtrees, an arena-native version that asserts first-Commit flushes all nodes, no-op Commit flushes none, and single-leaf Commit flushes only the root-to-leaf path.
This commit is contained in:
parent
1a37d82231
commit
939b36345f
6 changed files with 63 additions and 95 deletions
|
|
@ -34,9 +34,12 @@ func keyToPath(depth int, key []byte) ([]byte, error) {
|
|||
return path, nil
|
||||
}
|
||||
|
||||
// Invariant: dirty=false implies mustRecompute=false. Every mutation that
|
||||
// invalidates the cached hash MUST also mark the blob for re-flush.
|
||||
type InternalNode struct {
|
||||
left, right NodeRef
|
||||
depth uint8
|
||||
mustRecompute bool
|
||||
mustRecompute bool // hash is stale (cleared by Hash)
|
||||
dirty bool // on-disk blob is stale (cleared by CollectNodes)
|
||||
hash common.Hash
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,6 +84,7 @@ func (s *NodeStore) newInternalRef(depth int) NodeRef {
|
|||
n := s.getInternal(idx)
|
||||
n.depth = uint8(depth)
|
||||
n.mustRecompute = true
|
||||
n.dirty = true
|
||||
return MakeRef(KindInternal, idx)
|
||||
}
|
||||
|
||||
|
|
@ -119,6 +120,7 @@ func (s *NodeStore) newStemRef(stem []byte, depth int) NodeRef {
|
|||
copy(sn.Stem[:], stem[:StemSize])
|
||||
sn.depth = uint8(depth)
|
||||
sn.mustRecompute = true
|
||||
sn.dirty = true
|
||||
return MakeRef(KindStem, idx)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,9 @@ import (
|
|||
)
|
||||
|
||||
// StemNode holds up to 256 values sharing a 31-byte stem, packed via bitmap.
|
||||
//
|
||||
// Invariant: dirty=false implies mustRecompute=false. Every mutation that
|
||||
// invalidates the cached hash MUST also mark the blob for re-flush.
|
||||
type StemNode struct {
|
||||
Stem [StemSize]byte
|
||||
bitmap [StemBitmapSize]byte
|
||||
|
|
@ -31,7 +34,8 @@ type StemNode struct {
|
|||
depth uint8
|
||||
shared bool // true if valueData is shared with serialized input
|
||||
|
||||
mustRecompute bool // true if the hash needs to be recomputed
|
||||
mustRecompute bool // hash is stale (cleared by Hash)
|
||||
dirty bool // on-disk blob is stale (cleared by CollectNodes)
|
||||
hash common.Hash // cached hash when mustRecompute == false
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -124,17 +124,19 @@ func (s *NodeStore) SerializeNode(ref NodeRef) []byte {
|
|||
|
||||
var errInvalidSerializedLength = errors.New("invalid serialized node length")
|
||||
|
||||
// DeserializeNode deserializes a node from bytes, recomputing its hash.
|
||||
// DeserializeNode deserializes a node from bytes, recomputing its hash. The
|
||||
// returned node is marked dirty (provenance unknown, safe re-flush default).
|
||||
func (s *NodeStore) DeserializeNode(serialized []byte, depth int) (NodeRef, error) {
|
||||
return s.deserializeNode(serialized, depth, common.Hash{}, true)
|
||||
return s.deserializeNode(serialized, depth, common.Hash{}, true, true)
|
||||
}
|
||||
|
||||
// DeserializeNodeWithHash deserializes a node, using the provided hash.
|
||||
// DeserializeNodeWithHash deserializes a node whose hash is already known and
|
||||
// whose blob is already on disk (mustRecompute=false, dirty=false).
|
||||
func (s *NodeStore) DeserializeNodeWithHash(serialized []byte, depth int, hn common.Hash) (NodeRef, error) {
|
||||
return s.deserializeNode(serialized, depth, hn, false)
|
||||
return s.deserializeNode(serialized, depth, hn, false, false)
|
||||
}
|
||||
|
||||
func (s *NodeStore) deserializeNode(serialized []byte, depth int, hn common.Hash, mustRecompute bool) (NodeRef, error) {
|
||||
func (s *NodeStore) deserializeNode(serialized []byte, depth int, hn common.Hash, mustRecompute, dirty bool) (NodeRef, error) {
|
||||
if len(serialized) == 0 {
|
||||
return EmptyRef, nil
|
||||
}
|
||||
|
|
@ -164,6 +166,7 @@ func (s *NodeStore) deserializeNode(serialized []byte, depth int, hn common.Hash
|
|||
node.hash = hn
|
||||
node.mustRecompute = false
|
||||
}
|
||||
node.dirty = dirty
|
||||
return ref, nil
|
||||
|
||||
case nodeTypeStem:
|
||||
|
|
@ -191,6 +194,7 @@ func (s *NodeStore) deserializeNode(serialized []byte, depth int, hn common.Hash
|
|||
sn.depth = uint8(depth)
|
||||
sn.hash = hn
|
||||
sn.mustRecompute = mustRecompute
|
||||
sn.dirty = dirty
|
||||
return MakeRef(KindStem, stemIdx), nil
|
||||
|
||||
default:
|
||||
|
|
@ -198,13 +202,18 @@ func (s *NodeStore) deserializeNode(serialized []byte, depth int, hn common.Hash
|
|||
}
|
||||
}
|
||||
|
||||
// CollectNodes flushes every node via flushfn in post-order.
|
||||
// CollectNodes flushes every node that needs flushing via flushfn in post-order.
|
||||
// Invariant: any ancestor of a node that needs flushing is itself marked, so a
|
||||
// clean root means the whole subtree is clean.
|
||||
func (s *NodeStore) CollectNodes(ref NodeRef, path []byte, flushfn NodeFlushFn) error {
|
||||
switch ref.Kind() {
|
||||
case KindEmpty:
|
||||
return nil
|
||||
case KindInternal:
|
||||
node := s.getInternal(ref.Index())
|
||||
if !node.dirty {
|
||||
return nil
|
||||
}
|
||||
leftPath := make([]byte, len(path)+1)
|
||||
copy(leftPath, path)
|
||||
leftPath[len(path)] = 0
|
||||
|
|
@ -218,9 +227,15 @@ func (s *NodeStore) CollectNodes(ref NodeRef, path []byte, flushfn NodeFlushFn)
|
|||
return err
|
||||
}
|
||||
flushfn(path, s.ComputeHash(ref), s.SerializeNode(ref))
|
||||
node.dirty = false
|
||||
return nil
|
||||
case KindStem:
|
||||
sn := s.getStem(ref.Index())
|
||||
if !sn.dirty {
|
||||
return nil
|
||||
}
|
||||
flushfn(path, s.ComputeHash(ref), s.SerializeNode(ref))
|
||||
sn.dirty = false
|
||||
return nil
|
||||
case KindHashed:
|
||||
return nil // Already committed
|
||||
|
|
|
|||
|
|
@ -193,6 +193,7 @@ func (s *NodeStore) InsertSingle(stem []byte, suffix byte, value []byte, resolve
|
|||
if sn.Stem == [StemSize]byte(stem[:StemSize]) {
|
||||
sn.setValue(suffix, value)
|
||||
sn.mustRecompute = true
|
||||
sn.dirty = true
|
||||
return nil
|
||||
}
|
||||
newRoot := s.splitStemInsert(s.root, stem, suffix, value, int(sn.depth))
|
||||
|
|
@ -218,6 +219,7 @@ func (s *NodeStore) insertSingleInternal(stem []byte, suffix byte, value []byte,
|
|||
case KindInternal:
|
||||
node := s.getInternal(cur.Index())
|
||||
node.mustRecompute = true
|
||||
node.dirty = true
|
||||
bit := stem[node.depth/8] >> (7 - (node.depth % 8)) & 1
|
||||
pathStack[pathLen] = pathEntry{internalIdx: cur.Index(), isLeft: bit == 0}
|
||||
pathLen++
|
||||
|
|
@ -232,6 +234,7 @@ func (s *NodeStore) insertSingleInternal(stem []byte, suffix byte, value []byte,
|
|||
if sn.Stem == [StemSize]byte(stem[:StemSize]) {
|
||||
sn.setValue(suffix, value)
|
||||
sn.mustRecompute = true
|
||||
sn.dirty = true
|
||||
return nil
|
||||
}
|
||||
// Different stem — split
|
||||
|
|
@ -338,6 +341,7 @@ func (s *NodeStore) splitStemInsert(existingRef NodeRef, newStem []byte, suffix
|
|||
copy(newSn.Stem[:], newStem[:StemSize])
|
||||
newSn.depth = uint8(existingDepth + 1)
|
||||
newSn.mustRecompute = true
|
||||
newSn.dirty = true
|
||||
newSn.setValue(suffix, value)
|
||||
newStemRef := MakeRef(KindStem, newStemIdx)
|
||||
|
||||
|
|
@ -426,6 +430,7 @@ func (s *NodeStore) insertValuesAtStem(ref NodeRef, stem []byte, values [][]byte
|
|||
node.right = newChild
|
||||
}
|
||||
node.mustRecompute = true
|
||||
node.dirty = true
|
||||
return ref, nil
|
||||
|
||||
case KindStem:
|
||||
|
|
@ -436,6 +441,7 @@ func (s *NodeStore) insertValuesAtStem(ref NodeRef, stem []byte, values [][]byte
|
|||
if v != nil {
|
||||
sn.setValue(byte(i), v)
|
||||
sn.mustRecompute = true
|
||||
sn.dirty = true
|
||||
}
|
||||
}
|
||||
return ref, nil
|
||||
|
|
@ -470,6 +476,7 @@ func (s *NodeStore) insertValuesAtStem(ref NodeRef, stem []byte, values [][]byte
|
|||
copy(sn.Stem[:], stem[:StemSize])
|
||||
sn.depth = uint8(depth)
|
||||
sn.mustRecompute = true
|
||||
sn.dirty = true
|
||||
for i, v := range values {
|
||||
if v != nil {
|
||||
sn.count++
|
||||
|
|
@ -526,6 +533,7 @@ func (s *NodeStore) splitStemValuesInsert(existingRef NodeRef, newStem []byte, v
|
|||
copy(newSn.Stem[:], newStem[:StemSize])
|
||||
newSn.depth = nNode.depth + 1
|
||||
newSn.mustRecompute = true
|
||||
newSn.dirty = true
|
||||
for i, v := range values {
|
||||
if v != nil {
|
||||
newSn.setValue(byte(i), v)
|
||||
|
|
|
|||
|
|
@ -762,36 +762,29 @@ func TestGetStorageNonMembershipInternalRoot(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// commitKeyN derives a distinct 32-byte key from a seed integer. Used by
|
||||
// TestBinaryTrieCommitIncremental and BenchmarkCollectNodes_SparseWrite to
|
||||
// populate a trie with many disjoint stems.
|
||||
func commitKeyN(i int) [HashSize]byte {
|
||||
var k [HashSize]byte
|
||||
binary.BigEndian.PutUint64(k[:8], uint64(i)*0x9e3779b97f4a7c15)
|
||||
binary.BigEndian.PutUint64(k[8:16], uint64(i)*0xc2b2ae3d27d4eb4f)
|
||||
binary.BigEndian.PutUint64(k[16:24], uint64(i)*0x165667b19e3779f9)
|
||||
binary.BigEndian.PutUint64(k[24:32], uint64(i)*0x85ebca77c2b2ae63)
|
||||
return k
|
||||
}
|
||||
|
||||
// TestBinaryTrieCommitIncremental verifies that a second Commit with only a
|
||||
// single modified leaf flushes only the path from that leaf to the root,
|
||||
// not the entire tree.
|
||||
func TestBinaryTrieCommitIncremental(t *testing.T) {
|
||||
// TestCommitSkipCleanSubtrees verifies that CollectNodes short-circuits on
|
||||
// clean subtrees. First Commit flushes every resolved node; a follow-up
|
||||
// Commit with no modifications flushes nothing; a single-leaf modification
|
||||
// flushes only the root-to-leaf path.
|
||||
func TestCommitSkipCleanSubtrees(t *testing.T) {
|
||||
tr := &BinaryTrie{
|
||||
root: NewBinaryNode(),
|
||||
store: NewNodeStore(),
|
||||
tracer: trie.NewPrevalueTracer(),
|
||||
}
|
||||
|
||||
const n = 512
|
||||
keys := make([][HashSize]byte, n)
|
||||
const n = 200
|
||||
key := func(i int) [HashSize]byte {
|
||||
var k [HashSize]byte
|
||||
binary.BigEndian.PutUint64(k[:8], uint64(i+1)*0x9e3779b97f4a7c15)
|
||||
binary.BigEndian.PutUint64(k[8:16], uint64(i+1)*0xc2b2ae3d27d4eb4f)
|
||||
binary.BigEndian.PutUint64(k[16:24], uint64(i+1)*0x165667b19e3779f9)
|
||||
binary.BigEndian.PutUint64(k[24:32], uint64(i+1)*0x85ebca77c2b2ae63)
|
||||
return k
|
||||
}
|
||||
for i := range n {
|
||||
keys[i] = commitKeyN(i + 1)
|
||||
k := key(i)
|
||||
var v [HashSize]byte
|
||||
binary.BigEndian.PutUint64(v[24:], uint64(i+1))
|
||||
var err error
|
||||
tr.root, err = tr.root.Insert(keys[i][:], v[:], nil, 0)
|
||||
if err != nil {
|
||||
if err := tr.store.Insert(k[:], v[:], nil); err != nil {
|
||||
t.Fatalf("Insert %d: %v", i, err)
|
||||
}
|
||||
}
|
||||
|
|
@ -800,84 +793,27 @@ func TestBinaryTrieCommitIncremental(t *testing.T) {
|
|||
if len(ns1.Nodes) == 0 {
|
||||
t.Fatal("first Commit produced empty NodeSet")
|
||||
}
|
||||
if len(ns1.Nodes) < n {
|
||||
t.Fatalf("first Commit: expected at least %d nodes, got %d", n, len(ns1.Nodes))
|
||||
}
|
||||
|
||||
// Second Commit on the same trie with no modifications: NodeSet must
|
||||
// be empty because every subtree is clean.
|
||||
_, nsNoop := tr.Commit(false)
|
||||
if len(nsNoop.Nodes) != 0 {
|
||||
t.Fatalf("no-op Commit: expected empty NodeSet, got %d nodes", len(nsNoop.Nodes))
|
||||
t.Fatalf("no-op Commit: expected empty NodeSet, got %d", len(nsNoop.Nodes))
|
||||
}
|
||||
|
||||
// Modify a single leaf's value. Only the path from that leaf to the
|
||||
// root should appear in the next Commit's NodeSet.
|
||||
// Modify a single leaf — only the root-to-leaf path should flush.
|
||||
k := key(n / 2)
|
||||
var newVal [HashSize]byte
|
||||
newVal[0] = 0xff
|
||||
var err error
|
||||
tr.root, err = tr.root.Insert(keys[n/2][:], newVal[:], nil, 0)
|
||||
if err != nil {
|
||||
if err := tr.store.Insert(k[:], newVal[:], nil); err != nil {
|
||||
t.Fatalf("Insert (modify): %v", err)
|
||||
}
|
||||
_, ns2 := tr.Commit(false)
|
||||
|
||||
// Path length for a binary trie of n=512 stems is bounded by the
|
||||
// internal depth at which the modified stem sits. Allow generous
|
||||
// slack: up to 64 nodes is fine, anywhere near n (512) is a regression.
|
||||
if len(ns2.Nodes) == 0 {
|
||||
t.Fatal("modified Commit produced empty NodeSet")
|
||||
}
|
||||
if len(ns2.Nodes) > 64 {
|
||||
t.Fatalf("modified Commit: expected small NodeSet, got %d nodes (first Commit had %d)", len(ns2.Nodes), len(ns1.Nodes))
|
||||
if len(ns2.Nodes) > 32 {
|
||||
t.Fatalf("modified Commit: expected ≤32 nodes (path+stem), got %d", len(ns2.Nodes))
|
||||
}
|
||||
if len(ns2.Nodes) >= len(ns1.Nodes) {
|
||||
t.Fatalf("expected second NodeSet (%d) to be smaller than first (%d)", len(ns2.Nodes), len(ns1.Nodes))
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkCollectNodes_SparseWrite measures Commit cost when only one leaf
|
||||
// changes between blocks — the common case for state updates. After warm-up
|
||||
// (populate + initial Commit), each iteration modifies a single leaf and
|
||||
// re-Commits. Under the skip-clean optimization, each iteration flushes
|
||||
// only the root-to-leaf path; pre-fix behavior would re-flush the entire
|
||||
// tree every iteration.
|
||||
func BenchmarkCollectNodes_SparseWrite(b *testing.B) {
|
||||
const n = 10_000
|
||||
|
||||
tr := &BinaryTrie{
|
||||
root: NewBinaryNode(),
|
||||
tracer: trie.NewPrevalueTracer(),
|
||||
}
|
||||
keys := make([][HashSize]byte, n)
|
||||
for i := range n {
|
||||
keys[i] = commitKeyN(i + 1)
|
||||
var v [HashSize]byte
|
||||
binary.BigEndian.PutUint64(v[24:], uint64(i+1))
|
||||
var err error
|
||||
tr.root, err = tr.root.Insert(keys[i][:], v[:], nil, 0)
|
||||
if err != nil {
|
||||
b.Fatalf("Insert %d: %v", i, err)
|
||||
}
|
||||
}
|
||||
// Flush the initial tree so subsequent Commits reflect the
|
||||
// single-modification workload we want to measure.
|
||||
_, _ = tr.Commit(false)
|
||||
|
||||
var newVal [HashSize]byte
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
idx := i % n
|
||||
binary.BigEndian.PutUint64(newVal[24:], uint64(i+1))
|
||||
var err error
|
||||
tr.root, err = tr.root.Insert(keys[idx][:], newVal[:], nil, 0)
|
||||
if err != nil {
|
||||
b.Fatalf("Insert at iter %d: %v", i, err)
|
||||
}
|
||||
_, ns := tr.Commit(false)
|
||||
if len(ns.Nodes) == 0 {
|
||||
b.Fatalf("iter %d: empty NodeSet", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue