From a8b841c0a04319eb58b8845ec61a813c73e31493 Mon Sep 17 00:00:00 2001 From: Daniel Liu <139250065@qq.com> Date: Wed, 20 Aug 2025 16:28:58 +0800 Subject: [PATCH] trie: implement NodeBlob API for trie iterator #24391 (#1084) This functionality is needed in new path-based storage scheme, but can be implemented in a seperate PR though. When an account is deleted, then all the storage slots should be nuked out from the disk as well. In hash-based storage scheme they are still left in the disk but in new scheme, they will be iterated and marked as deleted. But why the NodeBlob API is needed in this scenario? Because when the node is marked deleted, the previous value is also required to be recorded to construct the reverse diff. Co-authored-by: Gary Rong --- trie/iterator.go | 35 ++++++++++++++++++++++++++++- trie/iterator_test.go | 51 +++++++++++++++++++++++++++++++++++++++++++ trie/trie.go | 9 ++++++++ 3 files changed, 94 insertions(+), 1 deletion(-) diff --git a/trie/iterator.go b/trie/iterator.go index 47b1c1414b..022ebc73a9 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -86,7 +86,11 @@ type NodeIterator interface { // For leaf nodes, the last element of the path is the 'terminator symbol' 0x10. Path() []byte - // Leaf returns true iff the current Node is a leaf Node. + // NodeBlob returns the rlp-encoded value of the current iterated node. + // If the node is an embedded node in its parent, nil is returned then. + NodeBlob() []byte + + // Leaf returns true iff the current node is a leaf node. Leaf() bool // LeafKey returns the key of the leaf. The method panics if the iterator is not @@ -226,6 +230,18 @@ func (it *nodeIterator) Path() []byte { return it.path } +func (it *nodeIterator) NodeBlob() []byte { + if it.Hash() == (common.Hash{}) { + return nil // skip the non-standalone node + } + blob, err := it.resolveBlob(it.Hash().Bytes(), it.Path()) + if err != nil { + it.err = err + return nil + } + return blob +} + func (it *nodeIterator) Error() error { if it.err == errIteratorEnd { return nil @@ -364,6 +380,15 @@ func (it *nodeIterator) resolveHash(hash hashNode, path []byte) (node, error) { return resolved, err } +func (it *nodeIterator) resolveBlob(hash hashNode, path []byte) ([]byte, error) { + if it.resolver != nil { + if blob, err := it.resolver.Get(hash); err == nil && len(blob) > 0 { + return blob, nil + } + } + return it.trie.resolveBlob(hash, path) +} + func (st *nodeIteratorState) resolve(it *nodeIterator, path []byte) error { if hash, ok := st.node.(hashNode); ok { resolved, err := it.resolveHash(hash, path) @@ -552,6 +577,10 @@ func (it *differenceIterator) Path() []byte { return it.b.Path() } +func (it *differenceIterator) NodeBlob() []byte { + return it.b.NodeBlob() +} + func (it *differenceIterator) AddResolver(resolver ethdb.KeyValueReader) { panic("not implemented") } @@ -663,6 +692,10 @@ func (it *unionIterator) Path() []byte { return (*it.items)[0].Path() } +func (it *unionIterator) NodeBlob() []byte { + return (*it.items)[0].NodeBlob() +} + func (it *unionIterator) AddResolver(resolver ethdb.KeyValueReader) { panic("not implemented") } diff --git a/trie/iterator_test.go b/trie/iterator_test.go index a9cb26aede..f77ce669d1 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -539,3 +539,54 @@ func TestNodeIteratorLargeTrie(t *testing.T) { t.Fatalf("Too many lookups during seek, have %d want %d", have, want) } } + +func TestIteratorNodeBlob(t *testing.T) { + var ( + db = memorydb.New() + triedb = NewDatabase(db) + trie, _ = New(common.Hash{}, triedb) + ) + vals := []struct{ k, v string }{ + {"do", "verb"}, + {"ether", "wookiedoo"}, + {"horse", "stallion"}, + {"shaman", "horse"}, + {"doge", "coin"}, + {"dog", "puppy"}, + {"somethingveryoddindeedthis is", "myothernodedata"}, + } + all := make(map[string]string) + for _, val := range vals { + all[val.k] = val.v + trie.Update([]byte(val.k), []byte(val.v)) + } + trie.Commit(nil) + triedb.Cap(0) + + found := make(map[common.Hash][]byte) + it := trie.NodeIterator(nil) + for it.Next(true) { + if it.Hash() == (common.Hash{}) { + continue + } + found[it.Hash()] = it.NodeBlob() + } + + dbIter := db.NewIterator(nil, nil) + defer dbIter.Release() + + var count int + for dbIter.Next() { + got, present := found[common.BytesToHash(dbIter.Key())] + if !present { + t.Fatalf("Miss trie node %v", dbIter.Key()) + } + if !bytes.Equal(got, dbIter.Value()) { + t.Fatalf("Unexpected trie node want %v got %v", dbIter.Value(), got) + } + count += 1 + } + if count != len(found) { + t.Fatal("Find extra trie node via iterator") + } +} diff --git a/trie/trie.go b/trie/trie.go index d07ebe6132..e9b4aa967c 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -669,6 +669,15 @@ func (t *Trie) resolveHash(n hashNode, prefix []byte) (node, error) { return nil, &MissingNodeError{NodeHash: hash, Path: prefix} } +func (t *Trie) resolveBlob(n hashNode, prefix []byte) ([]byte, error) { + hash := common.BytesToHash(n) + blob, _ := t.Db.Node(hash) + if len(blob) != 0 { + return blob, nil + } + return nil, &MissingNodeError{NodeHash: hash, Path: prefix} +} + // Hash returns the root hash of the trie. It does not write to the // database and can be used even if the trie doesn't have one. func (t *Trie) Hash() common.Hash {