diff --git a/trie/bintrie/internal_node.go b/trie/bintrie/internal_node.go index dd6aa4b85c..dfce40afe5 100644 --- a/trie/bintrie/internal_node.go +++ b/trie/bintrie/internal_node.go @@ -34,9 +34,12 @@ func keyToPath(depth int, key []byte) ([]byte, error) { return path, nil } +// Invariant: dirty=false implies mustRecompute=false. Every mutation that +// invalidates the cached hash MUST also mark the blob for re-flush. type InternalNode struct { left, right NodeRef depth uint8 - mustRecompute bool + mustRecompute bool // hash is stale (cleared by Hash) + dirty bool // on-disk blob is stale (cleared by CollectNodes) hash common.Hash } diff --git a/trie/bintrie/node_store.go b/trie/bintrie/node_store.go index 291b968ae1..eb8eb8240a 100644 --- a/trie/bintrie/node_store.go +++ b/trie/bintrie/node_store.go @@ -84,6 +84,7 @@ func (s *NodeStore) newInternalRef(depth int) NodeRef { n := s.getInternal(idx) n.depth = uint8(depth) n.mustRecompute = true + n.dirty = true return MakeRef(KindInternal, idx) } @@ -119,6 +120,7 @@ func (s *NodeStore) newStemRef(stem []byte, depth int) NodeRef { copy(sn.Stem[:], stem[:StemSize]) sn.depth = uint8(depth) sn.mustRecompute = true + sn.dirty = true return MakeRef(KindStem, idx) } diff --git a/trie/bintrie/stem_node.go b/trie/bintrie/stem_node.go index f268b6be74..a73eb8f39d 100644 --- a/trie/bintrie/stem_node.go +++ b/trie/bintrie/stem_node.go @@ -23,6 +23,9 @@ import ( ) // StemNode holds up to 256 values sharing a 31-byte stem, packed via bitmap. +// +// Invariant: dirty=false implies mustRecompute=false. Every mutation that +// invalidates the cached hash MUST also mark the blob for re-flush. type StemNode struct { Stem [StemSize]byte bitmap [StemBitmapSize]byte @@ -31,7 +34,8 @@ type StemNode struct { depth uint8 shared bool // true if valueData is shared with serialized input - mustRecompute bool // true if the hash needs to be recomputed + mustRecompute bool // hash is stale (cleared by Hash) + dirty bool // on-disk blob is stale (cleared by CollectNodes) hash common.Hash // cached hash when mustRecompute == false } diff --git a/trie/bintrie/store_commit.go b/trie/bintrie/store_commit.go index 8ac296c083..606a2b32a4 100644 --- a/trie/bintrie/store_commit.go +++ b/trie/bintrie/store_commit.go @@ -124,17 +124,19 @@ func (s *NodeStore) SerializeNode(ref NodeRef) []byte { var errInvalidSerializedLength = errors.New("invalid serialized node length") -// DeserializeNode deserializes a node from bytes, recomputing its hash. +// DeserializeNode deserializes a node from bytes, recomputing its hash. The +// returned node is marked dirty (provenance unknown, safe re-flush default). func (s *NodeStore) DeserializeNode(serialized []byte, depth int) (NodeRef, error) { - return s.deserializeNode(serialized, depth, common.Hash{}, true) + return s.deserializeNode(serialized, depth, common.Hash{}, true, true) } -// DeserializeNodeWithHash deserializes a node, using the provided hash. +// DeserializeNodeWithHash deserializes a node whose hash is already known and +// whose blob is already on disk (mustRecompute=false, dirty=false). func (s *NodeStore) DeserializeNodeWithHash(serialized []byte, depth int, hn common.Hash) (NodeRef, error) { - return s.deserializeNode(serialized, depth, hn, false) + return s.deserializeNode(serialized, depth, hn, false, false) } -func (s *NodeStore) deserializeNode(serialized []byte, depth int, hn common.Hash, mustRecompute bool) (NodeRef, error) { +func (s *NodeStore) deserializeNode(serialized []byte, depth int, hn common.Hash, mustRecompute, dirty bool) (NodeRef, error) { if len(serialized) == 0 { return EmptyRef, nil } @@ -164,6 +166,7 @@ func (s *NodeStore) deserializeNode(serialized []byte, depth int, hn common.Hash node.hash = hn node.mustRecompute = false } + node.dirty = dirty return ref, nil case nodeTypeStem: @@ -191,6 +194,7 @@ func (s *NodeStore) deserializeNode(serialized []byte, depth int, hn common.Hash sn.depth = uint8(depth) sn.hash = hn sn.mustRecompute = mustRecompute + sn.dirty = dirty return MakeRef(KindStem, stemIdx), nil default: @@ -198,13 +202,18 @@ func (s *NodeStore) deserializeNode(serialized []byte, depth int, hn common.Hash } } -// CollectNodes flushes every node via flushfn in post-order. +// CollectNodes flushes every node that needs flushing via flushfn in post-order. +// Invariant: any ancestor of a node that needs flushing is itself marked, so a +// clean root means the whole subtree is clean. func (s *NodeStore) CollectNodes(ref NodeRef, path []byte, flushfn NodeFlushFn) error { switch ref.Kind() { case KindEmpty: return nil case KindInternal: node := s.getInternal(ref.Index()) + if !node.dirty { + return nil + } leftPath := make([]byte, len(path)+1) copy(leftPath, path) leftPath[len(path)] = 0 @@ -218,9 +227,15 @@ func (s *NodeStore) CollectNodes(ref NodeRef, path []byte, flushfn NodeFlushFn) return err } flushfn(path, s.ComputeHash(ref), s.SerializeNode(ref)) + node.dirty = false return nil case KindStem: + sn := s.getStem(ref.Index()) + if !sn.dirty { + return nil + } flushfn(path, s.ComputeHash(ref), s.SerializeNode(ref)) + sn.dirty = false return nil case KindHashed: return nil // Already committed diff --git a/trie/bintrie/store_ops.go b/trie/bintrie/store_ops.go index eea9c01a70..f5dc9c8035 100644 --- a/trie/bintrie/store_ops.go +++ b/trie/bintrie/store_ops.go @@ -193,6 +193,7 @@ func (s *NodeStore) InsertSingle(stem []byte, suffix byte, value []byte, resolve if sn.Stem == [StemSize]byte(stem[:StemSize]) { sn.setValue(suffix, value) sn.mustRecompute = true + sn.dirty = true return nil } newRoot := s.splitStemInsert(s.root, stem, suffix, value, int(sn.depth)) @@ -218,6 +219,7 @@ func (s *NodeStore) insertSingleInternal(stem []byte, suffix byte, value []byte, case KindInternal: node := s.getInternal(cur.Index()) node.mustRecompute = true + node.dirty = true bit := stem[node.depth/8] >> (7 - (node.depth % 8)) & 1 pathStack[pathLen] = pathEntry{internalIdx: cur.Index(), isLeft: bit == 0} pathLen++ @@ -232,6 +234,7 @@ func (s *NodeStore) insertSingleInternal(stem []byte, suffix byte, value []byte, if sn.Stem == [StemSize]byte(stem[:StemSize]) { sn.setValue(suffix, value) sn.mustRecompute = true + sn.dirty = true return nil } // Different stem — split @@ -338,6 +341,7 @@ func (s *NodeStore) splitStemInsert(existingRef NodeRef, newStem []byte, suffix copy(newSn.Stem[:], newStem[:StemSize]) newSn.depth = uint8(existingDepth + 1) newSn.mustRecompute = true + newSn.dirty = true newSn.setValue(suffix, value) newStemRef := MakeRef(KindStem, newStemIdx) @@ -426,6 +430,7 @@ func (s *NodeStore) insertValuesAtStem(ref NodeRef, stem []byte, values [][]byte node.right = newChild } node.mustRecompute = true + node.dirty = true return ref, nil case KindStem: @@ -436,6 +441,7 @@ func (s *NodeStore) insertValuesAtStem(ref NodeRef, stem []byte, values [][]byte if v != nil { sn.setValue(byte(i), v) sn.mustRecompute = true + sn.dirty = true } } return ref, nil @@ -470,6 +476,7 @@ func (s *NodeStore) insertValuesAtStem(ref NodeRef, stem []byte, values [][]byte copy(sn.Stem[:], stem[:StemSize]) sn.depth = uint8(depth) sn.mustRecompute = true + sn.dirty = true for i, v := range values { if v != nil { sn.count++ @@ -526,6 +533,7 @@ func (s *NodeStore) splitStemValuesInsert(existingRef NodeRef, newStem []byte, v copy(newSn.Stem[:], newStem[:StemSize]) newSn.depth = nNode.depth + 1 newSn.mustRecompute = true + newSn.dirty = true for i, v := range values { if v != nil { newSn.setValue(byte(i), v) diff --git a/trie/bintrie/trie_test.go b/trie/bintrie/trie_test.go index a122b2d152..0b8c2b99fc 100644 --- a/trie/bintrie/trie_test.go +++ b/trie/bintrie/trie_test.go @@ -762,36 +762,29 @@ func TestGetStorageNonMembershipInternalRoot(t *testing.T) { } } -// commitKeyN derives a distinct 32-byte key from a seed integer. Used by -// TestBinaryTrieCommitIncremental and BenchmarkCollectNodes_SparseWrite to -// populate a trie with many disjoint stems. -func commitKeyN(i int) [HashSize]byte { - var k [HashSize]byte - binary.BigEndian.PutUint64(k[:8], uint64(i)*0x9e3779b97f4a7c15) - binary.BigEndian.PutUint64(k[8:16], uint64(i)*0xc2b2ae3d27d4eb4f) - binary.BigEndian.PutUint64(k[16:24], uint64(i)*0x165667b19e3779f9) - binary.BigEndian.PutUint64(k[24:32], uint64(i)*0x85ebca77c2b2ae63) - return k -} - -// TestBinaryTrieCommitIncremental verifies that a second Commit with only a -// single modified leaf flushes only the path from that leaf to the root, -// not the entire tree. -func TestBinaryTrieCommitIncremental(t *testing.T) { +// TestCommitSkipCleanSubtrees verifies that CollectNodes short-circuits on +// clean subtrees. First Commit flushes every resolved node; a follow-up +// Commit with no modifications flushes nothing; a single-leaf modification +// flushes only the root-to-leaf path. +func TestCommitSkipCleanSubtrees(t *testing.T) { tr := &BinaryTrie{ - root: NewBinaryNode(), + store: NewNodeStore(), tracer: trie.NewPrevalueTracer(), } - - const n = 512 - keys := make([][HashSize]byte, n) + const n = 200 + key := func(i int) [HashSize]byte { + var k [HashSize]byte + binary.BigEndian.PutUint64(k[:8], uint64(i+1)*0x9e3779b97f4a7c15) + binary.BigEndian.PutUint64(k[8:16], uint64(i+1)*0xc2b2ae3d27d4eb4f) + binary.BigEndian.PutUint64(k[16:24], uint64(i+1)*0x165667b19e3779f9) + binary.BigEndian.PutUint64(k[24:32], uint64(i+1)*0x85ebca77c2b2ae63) + return k + } for i := range n { - keys[i] = commitKeyN(i + 1) + k := key(i) var v [HashSize]byte binary.BigEndian.PutUint64(v[24:], uint64(i+1)) - var err error - tr.root, err = tr.root.Insert(keys[i][:], v[:], nil, 0) - if err != nil { + if err := tr.store.Insert(k[:], v[:], nil); err != nil { t.Fatalf("Insert %d: %v", i, err) } } @@ -800,84 +793,27 @@ func TestBinaryTrieCommitIncremental(t *testing.T) { if len(ns1.Nodes) == 0 { t.Fatal("first Commit produced empty NodeSet") } - if len(ns1.Nodes) < n { - t.Fatalf("first Commit: expected at least %d nodes, got %d", n, len(ns1.Nodes)) - } - // Second Commit on the same trie with no modifications: NodeSet must - // be empty because every subtree is clean. _, nsNoop := tr.Commit(false) if len(nsNoop.Nodes) != 0 { - t.Fatalf("no-op Commit: expected empty NodeSet, got %d nodes", len(nsNoop.Nodes)) + t.Fatalf("no-op Commit: expected empty NodeSet, got %d", len(nsNoop.Nodes)) } - // Modify a single leaf's value. Only the path from that leaf to the - // root should appear in the next Commit's NodeSet. + // Modify a single leaf — only the root-to-leaf path should flush. + k := key(n / 2) var newVal [HashSize]byte newVal[0] = 0xff - var err error - tr.root, err = tr.root.Insert(keys[n/2][:], newVal[:], nil, 0) - if err != nil { + if err := tr.store.Insert(k[:], newVal[:], nil); err != nil { t.Fatalf("Insert (modify): %v", err) } _, ns2 := tr.Commit(false) - - // Path length for a binary trie of n=512 stems is bounded by the - // internal depth at which the modified stem sits. Allow generous - // slack: up to 64 nodes is fine, anywhere near n (512) is a regression. if len(ns2.Nodes) == 0 { t.Fatal("modified Commit produced empty NodeSet") } - if len(ns2.Nodes) > 64 { - t.Fatalf("modified Commit: expected small NodeSet, got %d nodes (first Commit had %d)", len(ns2.Nodes), len(ns1.Nodes)) + if len(ns2.Nodes) > 32 { + t.Fatalf("modified Commit: expected ≤32 nodes (path+stem), got %d", len(ns2.Nodes)) } if len(ns2.Nodes) >= len(ns1.Nodes) { t.Fatalf("expected second NodeSet (%d) to be smaller than first (%d)", len(ns2.Nodes), len(ns1.Nodes)) } } - -// BenchmarkCollectNodes_SparseWrite measures Commit cost when only one leaf -// changes between blocks — the common case for state updates. After warm-up -// (populate + initial Commit), each iteration modifies a single leaf and -// re-Commits. Under the skip-clean optimization, each iteration flushes -// only the root-to-leaf path; pre-fix behavior would re-flush the entire -// tree every iteration. -func BenchmarkCollectNodes_SparseWrite(b *testing.B) { - const n = 10_000 - - tr := &BinaryTrie{ - root: NewBinaryNode(), - tracer: trie.NewPrevalueTracer(), - } - keys := make([][HashSize]byte, n) - for i := range n { - keys[i] = commitKeyN(i + 1) - var v [HashSize]byte - binary.BigEndian.PutUint64(v[24:], uint64(i+1)) - var err error - tr.root, err = tr.root.Insert(keys[i][:], v[:], nil, 0) - if err != nil { - b.Fatalf("Insert %d: %v", i, err) - } - } - // Flush the initial tree so subsequent Commits reflect the - // single-modification workload we want to measure. - _, _ = tr.Commit(false) - - var newVal [HashSize]byte - b.ReportAllocs() - b.ResetTimer() - for i := 0; i < b.N; i++ { - idx := i % n - binary.BigEndian.PutUint64(newVal[24:], uint64(i+1)) - var err error - tr.root, err = tr.root.Insert(keys[idx][:], newVal[:], nil, 0) - if err != nil { - b.Fatalf("Insert at iter %d: %v", i, err) - } - _, ns := tr.Commit(false) - if len(ns.Nodes) == 0 { - b.Fatalf("iter %d: empty NodeSet", i) - } - } -}