trie, core: rework tracer and track origin value of dirty nodes (#32306)
Some checks are pending
/ Linux Build (push) Waiting to run
/ Linux Build (arm) (push) Waiting to run
/ Windows Build (push) Waiting to run
/ Docker Image (push) Waiting to run

These changes made in the PR should be highlighted here

The trie tracer is split into two distinct structs: opTracer and prevalueTracer. 
The former is specific to MPT, while the latter is generic and applicable to all
trie implementations.

The original values of dirty nodes are tracked in a NodeSet. This serves
as the foundation for both full archive node implementations and the state live
tracer.
This commit is contained in:
rjl493456442 2025-08-11 21:55:38 +08:00 committed by GitHub
parent 55a471efaf
commit cbbf686ecc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 383 additions and 178 deletions

View file

@ -977,7 +977,7 @@ func (s *StateDB) fastDeleteStorage(snaps *snapshot.Tree, addrHash common.Hash,
storageOrigins = make(map[common.Hash][]byte) // the set for tracking the original value of slot storageOrigins = make(map[common.Hash][]byte) // the set for tracking the original value of slot
) )
stack := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { stack := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
nodes.AddNode(path, trienode.NewDeleted()) nodes.AddNode(path, trienode.NewDeletedWithPrev(blob))
}) })
for iter.Next() { for iter.Next() {
slot := common.CopyBytes(iter.Slot()) slot := common.CopyBytes(iter.Slot())
@ -1028,7 +1028,7 @@ func (s *StateDB) slowDeleteStorage(addr common.Address, addrHash common.Hash, r
if it.Hash() == (common.Hash{}) { if it.Hash() == (common.Hash{}) {
continue continue
} }
nodes.AddNode(it.Path(), trienode.NewDeleted()) nodes.AddNode(it.Path(), trienode.NewDeletedWithPrev(it.NodeBlob()))
} }
if err := it.Error(); err != nil { if err := it.Error(); err != nil {
return nil, nil, nil, err return nil, nil, nil, err
@ -1160,7 +1160,7 @@ func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool) (*stateU
// //
// Given that some accounts may be destroyed and then recreated within // Given that some accounts may be destroyed and then recreated within
// the same block, it's possible that a node set with the same owner // the same block, it's possible that a node set with the same owner
// may already exists. In such cases, these two sets are combined, with // may already exist. In such cases, these two sets are combined, with
// the later one overwriting the previous one if any nodes are modified // the later one overwriting the previous one if any nodes are modified
// or deleted in both sets. // or deleted in both sets.
// //

View file

@ -29,12 +29,12 @@ import (
// insertion order. // insertion order.
type committer struct { type committer struct {
nodes *trienode.NodeSet nodes *trienode.NodeSet
tracer *tracer tracer *prevalueTracer
collectLeaf bool collectLeaf bool
} }
// newCommitter creates a new committer or picks one from the pool. // newCommitter creates a new committer or picks one from the pool.
func newCommitter(nodeset *trienode.NodeSet, tracer *tracer, collectLeaf bool) *committer { func newCommitter(nodeset *trienode.NodeSet, tracer *prevalueTracer, collectLeaf bool) *committer {
return &committer{ return &committer{
nodes: nodeset, nodes: nodeset,
tracer: tracer, tracer: tracer,
@ -110,14 +110,16 @@ func (c *committer) commitChildren(path []byte, n *fullNode, parallel bool) {
} else { } else {
wg.Add(1) wg.Add(1)
go func(index int) { go func(index int) {
defer wg.Done()
p := append(path, byte(index)) p := append(path, byte(index))
childSet := trienode.NewNodeSet(c.nodes.Owner) childSet := trienode.NewNodeSet(c.nodes.Owner)
childCommitter := newCommitter(childSet, c.tracer, c.collectLeaf) childCommitter := newCommitter(childSet, c.tracer, c.collectLeaf)
n.Children[index] = childCommitter.commit(p, child, false) n.Children[index] = childCommitter.commit(p, child, false)
nodesMu.Lock() nodesMu.Lock()
c.nodes.MergeSet(childSet) c.nodes.MergeDisjoint(childSet)
nodesMu.Unlock() nodesMu.Unlock()
wg.Done()
}(i) }(i)
} }
} }
@ -140,15 +142,15 @@ func (c *committer) store(path []byte, n node) node {
// The node is embedded in its parent, in other words, this node // The node is embedded in its parent, in other words, this node
// will not be stored in the database independently, mark it as // will not be stored in the database independently, mark it as
// deleted only if the node was existent in database before. // deleted only if the node was existent in database before.
_, ok := c.tracer.accessList[string(path)] origin := c.tracer.get(path)
if ok { if len(origin) != 0 {
c.nodes.AddNode(path, trienode.NewDeleted()) c.nodes.AddNode(path, trienode.NewDeletedWithPrev(origin))
} }
return n return n
} }
// Collect the dirty node to nodeset for return. // Collect the dirty node to nodeset for return.
nhash := common.BytesToHash(hash) nhash := common.BytesToHash(hash)
c.nodes.AddNode(path, trienode.New(nhash, nodeToBytes(n))) c.nodes.AddNode(path, trienode.NewNodeWithPrev(nhash, nodeToBytes(n), c.tracer.get(path)))
// Collect the corresponding leaf node if it's required. We don't check // Collect the corresponding leaf node if it's required. We don't check
// full node since it's impossible to store value in fullNode. The key // full node since it's impossible to store value in fullNode. The key

View file

@ -567,7 +567,12 @@ func VerifyRangeProof(rootHash common.Hash, firstKey []byte, keys [][]byte, valu
} }
// Rebuild the trie with the leaf stream, the shape of trie // Rebuild the trie with the leaf stream, the shape of trie
// should be same with the original one. // should be same with the original one.
tr := &Trie{root: root, reader: newEmptyReader(), tracer: newTracer()} tr := &Trie{
root: root,
reader: newEmptyReader(),
opTracer: newOpTracer(),
prevalueTracer: newPrevalueTracer(),
}
if empty { if empty {
tr.root = nil tr.root = nil
} }

View file

@ -18,14 +18,13 @@ package trie
import ( import (
"maps" "maps"
"slices"
"github.com/ethereum/go-ethereum/common"
) )
// tracer tracks the changes of trie nodes. During the trie operations, // opTracer tracks the changes of trie nodes. During the trie operations,
// some nodes can be deleted from the trie, while these deleted nodes // some nodes can be deleted from the trie, while these deleted nodes
// won't be captured by trie.Hasher or trie.Committer. Thus, these deleted // won't be captured by trie.Hasher or trie.Committer. Thus, these deleted
// nodes won't be removed from the disk at all. Tracer is an auxiliary tool // nodes won't be removed from the disk at all. opTracer is an auxiliary tool
// used to track all insert and delete operations of trie and capture all // used to track all insert and delete operations of trie and capture all
// deleted nodes eventually. // deleted nodes eventually.
// //
@ -35,38 +34,25 @@ import (
// This tool can track all of them no matter the node is embedded in its // This tool can track all of them no matter the node is embedded in its
// parent or not, but valueNode is never tracked. // parent or not, but valueNode is never tracked.
// //
// Besides, it's also used for recording the original value of the nodes // Note opTracer is not thread-safe, callers should be responsible for handling
// when they are resolved from the disk. The pre-value of the nodes will
// be used to construct trie history in the future.
//
// Note tracer is not thread-safe, callers should be responsible for handling
// the concurrency issues by themselves. // the concurrency issues by themselves.
type tracer struct { type opTracer struct {
inserts map[string]struct{} inserts map[string]struct{}
deletes map[string]struct{} deletes map[string]struct{}
accessList map[string][]byte
} }
// newTracer initializes the tracer for capturing trie changes. // newOpTracer initializes the tracer for capturing trie changes.
func newTracer() *tracer { func newOpTracer() *opTracer {
return &tracer{ return &opTracer{
inserts: make(map[string]struct{}), inserts: make(map[string]struct{}),
deletes: make(map[string]struct{}), deletes: make(map[string]struct{}),
accessList: make(map[string][]byte),
} }
} }
// onRead tracks the newly loaded trie node and caches the rlp-encoded
// blob internally. Don't change the value outside of function since
// it's not deep-copied.
func (t *tracer) onRead(path []byte, val []byte) {
t.accessList[string(path)] = val
}
// onInsert tracks the newly inserted trie node. If it's already // onInsert tracks the newly inserted trie node. If it's already
// in the deletion set (resurrected node), then just wipe it from // in the deletion set (resurrected node), then just wipe it from
// the deletion set as it's "untouched". // the deletion set as it's "untouched".
func (t *tracer) onInsert(path []byte) { func (t *opTracer) onInsert(path []byte) {
if _, present := t.deletes[string(path)]; present { if _, present := t.deletes[string(path)]; present {
delete(t.deletes, string(path)) delete(t.deletes, string(path))
return return
@ -77,7 +63,7 @@ func (t *tracer) onInsert(path []byte) {
// onDelete tracks the newly deleted trie node. If it's already // onDelete tracks the newly deleted trie node. If it's already
// in the addition set, then just wipe it from the addition set // in the addition set, then just wipe it from the addition set
// as it's untouched. // as it's untouched.
func (t *tracer) onDelete(path []byte) { func (t *opTracer) onDelete(path []byte) {
if _, present := t.inserts[string(path)]; present { if _, present := t.inserts[string(path)]; present {
delete(t.inserts, string(path)) delete(t.inserts, string(path))
return return
@ -86,37 +72,83 @@ func (t *tracer) onDelete(path []byte) {
} }
// reset clears the content tracked by tracer. // reset clears the content tracked by tracer.
func (t *tracer) reset() { func (t *opTracer) reset() {
t.inserts = make(map[string]struct{}) clear(t.inserts)
t.deletes = make(map[string]struct{}) clear(t.deletes)
t.accessList = make(map[string][]byte)
} }
// copy returns a deep copied tracer instance. // copy returns a deep copied tracer instance.
func (t *tracer) copy() *tracer { func (t *opTracer) copy() *opTracer {
accessList := make(map[string][]byte, len(t.accessList)) return &opTracer{
for path, blob := range t.accessList { inserts: maps.Clone(t.inserts),
accessList[path] = common.CopyBytes(blob) deletes: maps.Clone(t.deletes),
}
return &tracer{
inserts: maps.Clone(t.inserts),
deletes: maps.Clone(t.deletes),
accessList: accessList,
} }
} }
// deletedNodes returns a list of node paths which are deleted from the trie. // deletedList returns a list of node paths which are deleted from the trie.
func (t *tracer) deletedNodes() []string { func (t *opTracer) deletedList() [][]byte {
var paths []string paths := make([][]byte, 0, len(t.deletes))
for path := range t.deletes { for path := range t.deletes {
// It's possible a few deleted nodes were embedded paths = append(paths, []byte(path))
// in their parent before, the deletions can be no
// effect by deleting nothing, filter them out.
_, ok := t.accessList[path]
if !ok {
continue
}
paths = append(paths, path)
} }
return paths return paths
} }
// prevalueTracer tracks the original values of resolved trie nodes. Cached trie
// node values are expected to be immutable. A zero-size node value is treated as
// non-existent and should not occur in practice.
//
// Note prevalueTracer is not thread-safe, callers should be responsible for
// handling the concurrency issues by themselves.
type prevalueTracer struct {
data map[string][]byte
}
// newPrevalueTracer initializes the tracer for capturing resolved trie nodes.
func newPrevalueTracer() *prevalueTracer {
return &prevalueTracer{
data: make(map[string][]byte),
}
}
// put tracks the newly loaded trie node and caches its RLP-encoded
// blob internally. Do not modify the value outside this function,
// as it is not deep-copied.
func (t *prevalueTracer) put(path []byte, val []byte) {
t.data[string(path)] = val
}
// get returns the cached trie node value. If the node is not found, nil will
// be returned.
func (t *prevalueTracer) get(path []byte) []byte {
return t.data[string(path)]
}
// hasList returns a list of flags indicating whether the corresponding trie nodes
// specified by the path exist in the trie.
func (t *prevalueTracer) hasList(list [][]byte) []bool {
exists := make([]bool, 0, len(list))
for _, path := range list {
_, ok := t.data[string(path)]
exists = append(exists, ok)
}
return exists
}
// values returns a list of values of the cached trie nodes.
func (t *prevalueTracer) values() [][]byte {
return slices.Collect(maps.Values(t.data))
}
// reset resets the cached content in the prevalueTracer.
func (t *prevalueTracer) reset() {
clear(t.data)
}
// copy returns a copied prevalueTracer instance.
func (t *prevalueTracer) copy() *prevalueTracer {
// Shadow clone is used, as the cached trie node values are immutable
return &prevalueTracer{
data: maps.Clone(t.data),
}
}

View file

@ -52,15 +52,15 @@ var (
} }
) )
func TestTrieTracer(t *testing.T) { func TestTrieOpTracer(t *testing.T) {
testTrieTracer(t, tiny) testTrieOpTracer(t, tiny)
testTrieTracer(t, nonAligned) testTrieOpTracer(t, nonAligned)
testTrieTracer(t, standard) testTrieOpTracer(t, standard)
} }
// Tests if the trie diffs are tracked correctly. Tracer should capture // Tests if the trie diffs are tracked correctly. Tracer should capture
// all non-leaf dirty nodes, no matter the node is embedded or not. // all non-leaf dirty nodes, no matter the node is embedded or not.
func testTrieTracer(t *testing.T, vals []struct{ k, v string }) { func testTrieOpTracer(t *testing.T, vals []struct{ k, v string }) {
db := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme) db := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)
trie := NewEmpty(db) trie := NewEmpty(db)
@ -68,8 +68,9 @@ func testTrieTracer(t *testing.T, vals []struct{ k, v string }) {
for _, val := range vals { for _, val := range vals {
trie.MustUpdate([]byte(val.k), []byte(val.v)) trie.MustUpdate([]byte(val.k), []byte(val.v))
} }
insertSet := copySet(trie.tracer.inserts) // copy before commit insertSet := copySet(trie.opTracer.inserts) // copy before commit
deleteSet := copySet(trie.tracer.deletes) // copy before commit deleteSet := copySet(trie.opTracer.deletes) // copy before commit
root, nodes := trie.Commit(false) root, nodes := trie.Commit(false)
db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes))
@ -86,7 +87,7 @@ func testTrieTracer(t *testing.T, vals []struct{ k, v string }) {
for _, val := range vals { for _, val := range vals {
trie.MustDelete([]byte(val.k)) trie.MustDelete([]byte(val.k))
} }
insertSet, deleteSet = copySet(trie.tracer.inserts), copySet(trie.tracer.deletes) insertSet, deleteSet = copySet(trie.opTracer.inserts), copySet(trie.opTracer.deletes)
if !compareSet(insertSet, nil) { if !compareSet(insertSet, nil) {
t.Fatal("Unexpected insertion set") t.Fatal("Unexpected insertion set")
} }
@ -97,13 +98,13 @@ func testTrieTracer(t *testing.T, vals []struct{ k, v string }) {
// Test that after inserting a new batch of nodes and deleting them immediately, // Test that after inserting a new batch of nodes and deleting them immediately,
// the trie tracer should be cleared normally as no operation happened. // the trie tracer should be cleared normally as no operation happened.
func TestTrieTracerNoop(t *testing.T) { func TestTrieOpTracerNoop(t *testing.T) {
testTrieTracerNoop(t, tiny) testTrieOpTracerNoop(t, tiny)
testTrieTracerNoop(t, nonAligned) testTrieOpTracerNoop(t, nonAligned)
testTrieTracerNoop(t, standard) testTrieOpTracerNoop(t, standard)
} }
func testTrieTracerNoop(t *testing.T, vals []struct{ k, v string }) { func testTrieOpTracerNoop(t *testing.T, vals []struct{ k, v string }) {
db := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme) db := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)
trie := NewEmpty(db) trie := NewEmpty(db)
for _, val := range vals { for _, val := range vals {
@ -112,22 +113,22 @@ func testTrieTracerNoop(t *testing.T, vals []struct{ k, v string }) {
for _, val := range vals { for _, val := range vals {
trie.MustDelete([]byte(val.k)) trie.MustDelete([]byte(val.k))
} }
if len(trie.tracer.inserts) != 0 { if len(trie.opTracer.inserts) != 0 {
t.Fatal("Unexpected insertion set") t.Fatal("Unexpected insertion set")
} }
if len(trie.tracer.deletes) != 0 { if len(trie.opTracer.deletes) != 0 {
t.Fatal("Unexpected deletion set") t.Fatal("Unexpected deletion set")
} }
} }
// Tests if the accessList is correctly tracked. // Tests if the original value of trie nodes are correctly tracked.
func TestAccessList(t *testing.T) { func TestPrevalueTracer(t *testing.T) {
testAccessList(t, tiny) testPrevalueTracer(t, tiny)
testAccessList(t, nonAligned) testPrevalueTracer(t, nonAligned)
testAccessList(t, standard) testPrevalueTracer(t, standard)
} }
func testAccessList(t *testing.T, vals []struct{ k, v string }) { func testPrevalueTracer(t *testing.T, vals []struct{ k, v string }) {
var ( var (
db = newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme) db = newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)
trie = NewEmpty(db) trie = NewEmpty(db)
@ -210,7 +211,7 @@ func testAccessList(t *testing.T, vals []struct{ k, v string }) {
} }
// Tests origin values won't be tracked in Iterator or Prover // Tests origin values won't be tracked in Iterator or Prover
func TestAccessListLeak(t *testing.T) { func TestPrevalueTracerLeak(t *testing.T) {
var ( var (
db = newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme) db = newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)
trie = NewEmpty(db) trie = NewEmpty(db)
@ -249,9 +250,9 @@ func TestAccessListLeak(t *testing.T) {
} }
for _, c := range cases { for _, c := range cases {
trie, _ = New(TrieID(root), db) trie, _ = New(TrieID(root), db)
n1 := len(trie.tracer.accessList) n1 := len(trie.prevalueTracer.data)
c.op(trie) c.op(trie)
n2 := len(trie.tracer.accessList) n2 := len(trie.prevalueTracer.data)
if n1 != n2 { if n1 != n2 {
t.Fatalf("AccessList is leaked, prev %d after %d", n1, n2) t.Fatalf("AccessList is leaked, prev %d after %d", n1, n2)

View file

@ -55,8 +55,9 @@ type Trie struct {
// reader is the handler trie can retrieve nodes from. // reader is the handler trie can retrieve nodes from.
reader *trieReader reader *trieReader
// tracer is the tool to track the trie changes. // Various tracers for capturing the modifications to trie
tracer *tracer opTracer *opTracer
prevalueTracer *prevalueTracer
} }
// newFlag returns the cache flag value for a newly created node. // newFlag returns the cache flag value for a newly created node.
@ -67,13 +68,14 @@ func (t *Trie) newFlag() nodeFlag {
// Copy returns a copy of Trie. // Copy returns a copy of Trie.
func (t *Trie) Copy() *Trie { func (t *Trie) Copy() *Trie {
return &Trie{ return &Trie{
root: copyNode(t.root), root: copyNode(t.root),
owner: t.owner, owner: t.owner,
committed: t.committed, committed: t.committed,
unhashed: t.unhashed, unhashed: t.unhashed,
uncommitted: t.uncommitted, uncommitted: t.uncommitted,
reader: t.reader, reader: t.reader,
tracer: t.tracer.copy(), opTracer: t.opTracer.copy(),
prevalueTracer: t.prevalueTracer.copy(),
} }
} }
@ -89,9 +91,10 @@ func New(id *ID, db database.NodeDatabase) (*Trie, error) {
return nil, err return nil, err
} }
trie := &Trie{ trie := &Trie{
owner: id.Owner, owner: id.Owner,
reader: reader, reader: reader,
tracer: newTracer(), opTracer: newOpTracer(),
prevalueTracer: newPrevalueTracer(),
} }
if id.Root != (common.Hash{}) && id.Root != types.EmptyRootHash { if id.Root != (common.Hash{}) && id.Root != types.EmptyRootHash {
rootnode, err := trie.resolveAndTrack(id.Root[:], nil) rootnode, err := trie.resolveAndTrack(id.Root[:], nil)
@ -361,7 +364,7 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error
// New branch node is created as a child of the original short node. // New branch node is created as a child of the original short node.
// Track the newly inserted node in the tracer. The node identifier // Track the newly inserted node in the tracer. The node identifier
// passed is the path from the root node. // passed is the path from the root node.
t.tracer.onInsert(append(prefix, key[:matchlen]...)) t.opTracer.onInsert(append(prefix, key[:matchlen]...))
// Replace it with a short node leading up to the branch. // Replace it with a short node leading up to the branch.
return true, &shortNode{key[:matchlen], branch, t.newFlag()}, nil return true, &shortNode{key[:matchlen], branch, t.newFlag()}, nil
@ -379,7 +382,7 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error
// New short node is created and track it in the tracer. The node identifier // New short node is created and track it in the tracer. The node identifier
// passed is the path from the root node. Note the valueNode won't be tracked // passed is the path from the root node. Note the valueNode won't be tracked
// since it's always embedded in its parent. // since it's always embedded in its parent.
t.tracer.onInsert(prefix) t.opTracer.onInsert(prefix)
return true, &shortNode{key, value, t.newFlag()}, nil return true, &shortNode{key, value, t.newFlag()}, nil
@ -444,7 +447,7 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) {
// The matched short node is deleted entirely and track // The matched short node is deleted entirely and track
// it in the deletion set. The same the valueNode doesn't // it in the deletion set. The same the valueNode doesn't
// need to be tracked at all since it's always embedded. // need to be tracked at all since it's always embedded.
t.tracer.onDelete(prefix) t.opTracer.onDelete(prefix)
return true, nil, nil // remove n entirely for whole matches return true, nil, nil // remove n entirely for whole matches
} }
@ -460,7 +463,7 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) {
case *shortNode: case *shortNode:
// The child shortNode is merged into its parent, track // The child shortNode is merged into its parent, track
// is deleted as well. // is deleted as well.
t.tracer.onDelete(append(prefix, n.Key...)) t.opTracer.onDelete(append(prefix, n.Key...))
// Deleting from the subtrie reduced it to another // Deleting from the subtrie reduced it to another
// short node. Merge the nodes to avoid creating a // short node. Merge the nodes to avoid creating a
@ -525,7 +528,7 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) {
// Replace the entire full node with the short node. // Replace the entire full node with the short node.
// Mark the original short node as deleted since the // Mark the original short node as deleted since the
// value is embedded into the parent now. // value is embedded into the parent now.
t.tracer.onDelete(append(prefix, byte(pos))) t.opTracer.onDelete(append(prefix, byte(pos)))
k := append([]byte{byte(pos)}, cnode.Key...) k := append([]byte{byte(pos)}, cnode.Key...)
return true, &shortNode{k, cnode.Val, t.newFlag()}, nil return true, &shortNode{k, cnode.Val, t.newFlag()}, nil
@ -616,13 +619,31 @@ func (t *Trie) resolveAndTrack(n hashNode, prefix []byte) (node, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
t.tracer.onRead(prefix, blob) t.prevalueTracer.put(prefix, blob)
// The returned node blob won't be changed afterward. No need to // The returned node blob won't be changed afterward. No need to
// deep-copy the slice. // deep-copy the slice.
return decodeNodeUnsafe(n, blob) return decodeNodeUnsafe(n, blob)
} }
// deletedNodes returns a list of node paths, referring the nodes being deleted
// from the trie. It's possible a few deleted nodes were embedded in their parent
// before, the deletions can be no effect by deleting nothing, filter them out.
func (t *Trie) deletedNodes() [][]byte {
var (
pos int
list = t.opTracer.deletedList()
flags = t.prevalueTracer.hasList(list)
)
for i := 0; i < len(list); i++ {
if flags[i] {
list[pos] = list[i]
pos++
}
}
return list[:pos] // trim to the new length
}
// Hash returns the root hash of the trie. It does not write to the // Hash returns the root hash of the trie. It does not write to the
// database and can be used even if the trie doesn't have one. // database and can be used even if the trie doesn't have one.
func (t *Trie) Hash() common.Hash { func (t *Trie) Hash() common.Hash {
@ -644,13 +665,13 @@ func (t *Trie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) {
// (b) The trie was non-empty and all nodes are dropped => return // (b) The trie was non-empty and all nodes are dropped => return
// the node set includes all deleted nodes // the node set includes all deleted nodes
if t.root == nil { if t.root == nil {
paths := t.tracer.deletedNodes() paths := t.deletedNodes()
if len(paths) == 0 { if len(paths) == 0 {
return types.EmptyRootHash, nil // case (a) return types.EmptyRootHash, nil // case (a)
} }
nodes := trienode.NewNodeSet(t.owner) nodes := trienode.NewNodeSet(t.owner)
for _, path := range paths { for _, path := range paths {
nodes.AddNode([]byte(path), trienode.NewDeleted()) nodes.AddNode(path, trienode.NewDeletedWithPrev(t.prevalueTracer.get(path)))
} }
return types.EmptyRootHash, nodes // case (b) return types.EmptyRootHash, nodes // case (b)
} }
@ -667,11 +688,11 @@ func (t *Trie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) {
return rootHash, nil return rootHash, nil
} }
nodes := trienode.NewNodeSet(t.owner) nodes := trienode.NewNodeSet(t.owner)
for _, path := range t.tracer.deletedNodes() { for _, path := range t.deletedNodes() {
nodes.AddNode([]byte(path), trienode.NewDeleted()) nodes.AddNode(path, trienode.NewDeletedWithPrev(t.prevalueTracer.get(path)))
} }
// If the number of changes is below 100, we let one thread handle it // If the number of changes is below 100, we let one thread handle it
t.root = newCommitter(nodes, t.tracer, collectLeaf).Commit(t.root, t.uncommitted > 100) t.root = newCommitter(nodes, t.prevalueTracer, collectLeaf).Commit(t.root, t.uncommitted > 100)
t.uncommitted = 0 t.uncommitted = 0
return rootHash, nodes return rootHash, nodes
} }
@ -692,12 +713,13 @@ func (t *Trie) hashRoot() []byte {
// Witness returns a set containing all trie nodes that have been accessed. // Witness returns a set containing all trie nodes that have been accessed.
func (t *Trie) Witness() map[string]struct{} { func (t *Trie) Witness() map[string]struct{} {
if len(t.tracer.accessList) == 0 { values := t.prevalueTracer.values()
if len(values) == 0 {
return nil return nil
} }
witness := make(map[string]struct{}, len(t.tracer.accessList)) witness := make(map[string]struct{}, len(values))
for _, node := range t.tracer.accessList { for _, val := range values {
witness[string(node)] = struct{}{} witness[string(val)] = struct{}{}
} }
return witness return witness
} }
@ -708,6 +730,7 @@ func (t *Trie) Reset() {
t.owner = common.Hash{} t.owner = common.Hash{}
t.unhashed = 0 t.unhashed = 0
t.uncommitted = 0 t.uncommitted = 0
t.tracer.reset() t.opTracer.reset()
t.prevalueTracer.reset()
t.committed = false t.committed = false
} }

View file

@ -449,35 +449,35 @@ func verifyAccessList(old *Trie, new *Trie, set *trienode.NodeSet) error {
if !ok || n.IsDeleted() { if !ok || n.IsDeleted() {
return errors.New("expect new node") return errors.New("expect new node")
} }
//if len(n.Prev) > 0 { if len(set.Origins[path]) > 0 {
// return errors.New("unexpected origin value") return errors.New("unexpected origin value")
//} }
} }
// Check deletion set // Check deletion set
for path := range deletes { for path, blob := range deletes {
n, ok := set.Nodes[path] n, ok := set.Nodes[path]
if !ok || !n.IsDeleted() { if !ok || !n.IsDeleted() {
return errors.New("expect deleted node") return errors.New("expect deleted node")
} }
//if len(n.Prev) == 0 { if len(set.Origins[path]) == 0 {
// return errors.New("expect origin value") return errors.New("expect origin value")
//} }
//if !bytes.Equal(n.Prev, blob) { if !bytes.Equal(set.Origins[path], blob) {
// return errors.New("invalid origin value") return errors.New("invalid origin value")
//} }
} }
// Check update set // Check update set
for path := range updates { for path, blob := range updates {
n, ok := set.Nodes[path] n, ok := set.Nodes[path]
if !ok || n.IsDeleted() { if !ok || n.IsDeleted() {
return errors.New("expect updated node") return errors.New("expect updated node")
} }
//if len(n.Prev) == 0 { if len(set.Origins[path]) == 0 {
// return errors.New("expect origin value") return errors.New("expect origin value")
//} }
//if !bytes.Equal(n.Prev, blob) { if !bytes.Equal(set.Origins[path], blob) {
// return errors.New("invalid origin value") return errors.New("invalid origin value")
//} }
} }
return nil return nil
} }
@ -595,18 +595,18 @@ func runRandTest(rt randTest) error {
deleteExp[path] = struct{}{} deleteExp[path] = struct{}{}
} }
} }
if len(insertExp) != len(tr.tracer.inserts) { if len(insertExp) != len(tr.opTracer.inserts) {
rt[i].err = errors.New("insert set mismatch") rt[i].err = errors.New("insert set mismatch")
} }
if len(deleteExp) != len(tr.tracer.deletes) { if len(deleteExp) != len(tr.opTracer.deletes) {
rt[i].err = errors.New("delete set mismatch") rt[i].err = errors.New("delete set mismatch")
} }
for insert := range tr.tracer.inserts { for insert := range tr.opTracer.inserts {
if _, present := insertExp[insert]; !present { if _, present := insertExp[insert]; !present {
rt[i].err = errors.New("missing inserted node") rt[i].err = errors.New("missing inserted node")
} }
} }
for del := range tr.tracer.deletes { for del := range tr.opTracer.deletes {
if _, present := deleteExp[del]; !present { if _, present := deleteExp[del]; !present {
rt[i].err = errors.New("missing deleted node") rt[i].err = errors.New("missing deleted node")
} }

View file

@ -51,6 +51,35 @@ func New(hash common.Hash, blob []byte) *Node {
// NewDeleted constructs a node which is deleted. // NewDeleted constructs a node which is deleted.
func NewDeleted() *Node { return New(common.Hash{}, nil) } func NewDeleted() *Node { return New(common.Hash{}, nil) }
// NodeWithPrev is a wrapper over Node by tracking the original value of node.
type NodeWithPrev struct {
*Node
Prev []byte // Nil means the node was not existent
}
// NewNodeWithPrev constructs a node with the additional original value.
func NewNodeWithPrev(hash common.Hash, blob []byte, prev []byte) *NodeWithPrev {
return &NodeWithPrev{
Node: &Node{
Hash: hash,
Blob: blob,
},
Prev: prev,
}
}
// NewDeletedWithPrev constructs a node which is deleted with the additional
// original value.
func NewDeletedWithPrev(prev []byte) *NodeWithPrev {
return &NodeWithPrev{
Node: &Node{
Hash: common.Hash{},
Blob: nil,
},
Prev: prev,
}
}
// leaf represents a trie leaf node // leaf represents a trie leaf node
type leaf struct { type leaf struct {
Blob []byte // raw blob of leaf Blob []byte // raw blob of leaf
@ -63,6 +92,8 @@ type NodeSet struct {
Owner common.Hash Owner common.Hash
Leaves []*leaf Leaves []*leaf
Nodes map[string]*Node Nodes map[string]*Node
Origins map[string][]byte
updates int // the count of updated and inserted nodes updates int // the count of updated and inserted nodes
deletes int // the count of deleted nodes deletes int // the count of deleted nodes
} }
@ -71,8 +102,9 @@ type NodeSet struct {
// the owning account address hash for storage tries. // the owning account address hash for storage tries.
func NewNodeSet(owner common.Hash) *NodeSet { func NewNodeSet(owner common.Hash) *NodeSet {
return &NodeSet{ return &NodeSet{
Owner: owner, Owner: owner,
Nodes: make(map[string]*Node), Nodes: make(map[string]*Node),
Origins: make(map[string][]byte),
} }
} }
@ -91,22 +123,25 @@ func (set *NodeSet) ForEachWithOrder(callback func(path string, n *Node)) {
} }
// AddNode adds the provided node into set. // AddNode adds the provided node into set.
func (set *NodeSet) AddNode(path []byte, n *Node) { func (set *NodeSet) AddNode(path []byte, n *NodeWithPrev) {
if n.IsDeleted() { if n.IsDeleted() {
set.deletes += 1 set.deletes += 1
} else { } else {
set.updates += 1 set.updates += 1
} }
set.Nodes[string(path)] = n key := string(path)
set.Nodes[key] = n.Node
set.Origins[key] = n.Prev
} }
// MergeSet merges this 'set' with 'other'. It assumes that the sets are disjoint, // MergeDisjoint merges this 'set' with 'other'. It assumes that the sets are disjoint,
// and thus does not deduplicate data (count deletes, dedup leaves etc). // and thus does not deduplicate data (count deletes, dedup leaves etc).
func (set *NodeSet) MergeSet(other *NodeSet) error { func (set *NodeSet) MergeDisjoint(other *NodeSet) error {
if set.Owner != other.Owner { if set.Owner != other.Owner {
return fmt.Errorf("nodesets belong to different owner are not mergeable %x-%x", set.Owner, other.Owner) return fmt.Errorf("nodesets belong to different owner are not mergeable %x-%x", set.Owner, other.Owner)
} }
maps.Copy(set.Nodes, other.Nodes) maps.Copy(set.Nodes, other.Nodes)
maps.Copy(set.Origins, other.Origins)
set.deletes += other.deletes set.deletes += other.deletes
set.updates += other.updates set.updates += other.updates
@ -117,12 +152,13 @@ func (set *NodeSet) MergeSet(other *NodeSet) error {
return nil return nil
} }
// Merge adds a set of nodes into the set. // Merge adds a set of nodes to the current set. It assumes the sets may overlap,
func (set *NodeSet) Merge(owner common.Hash, nodes map[string]*Node) error { // so deduplication is performed.
if set.Owner != owner { func (set *NodeSet) Merge(other *NodeSet) error {
return fmt.Errorf("nodesets belong to different owner are not mergeable %x-%x", set.Owner, owner) if set.Owner != other.Owner {
return fmt.Errorf("nodesets belong to different owner are not mergeable %x-%x", set.Owner, other.Owner)
} }
for path, node := range nodes { for path, node := range other.Nodes {
prev, ok := set.Nodes[path] prev, ok := set.Nodes[path]
if ok { if ok {
// overwrite happens, revoke the counter // overwrite happens, revoke the counter
@ -137,8 +173,17 @@ func (set *NodeSet) Merge(owner common.Hash, nodes map[string]*Node) error {
} else { } else {
set.updates += 1 set.updates += 1
} }
set.Nodes[path] = node set.Nodes[path] = node // overwrite the node with new value
// Add the original value only if it was previously non-existent.
// If multiple mutations are made to the same node, the first one
// is considered the true original value.
if _, exist := set.Origins[path]; !exist {
set.Origins[path] = other.Origins[path]
}
} }
// TODO leaves are not aggregated, as they are not used in storage tries.
// TODO(rjl493456442) deprecate the leaves along with the legacy hash mode.
return nil return nil
} }
@ -169,11 +214,16 @@ func (set *NodeSet) Summary() string {
for path, n := range set.Nodes { for path, n := range set.Nodes {
// Deletion // Deletion
if n.IsDeleted() { if n.IsDeleted() {
fmt.Fprintf(out, " [-]: %x\n", path) fmt.Fprintf(out, " [-]: %x prev: %x\n", path, set.Origins[path])
continue continue
} }
// Insertion or update // Insertion
fmt.Fprintf(out, " [+/*]: %x -> %v \n", path, n.Hash) if len(set.Origins[path]) == 0 {
fmt.Fprintf(out, " [+]: %x -> %v\n", path, n.Hash)
continue
}
// Update
fmt.Fprintf(out, " [*]: %x -> %v prev: %x\n", path, n.Hash, set.Origins[path])
} }
for _, n := range set.Leaves { for _, n := range set.Leaves {
fmt.Fprintf(out, "[leaf]: %v\n", n) fmt.Fprintf(out, "[leaf]: %v\n", n)
@ -203,7 +253,7 @@ func NewWithNodeSet(set *NodeSet) *MergedNodeSet {
func (set *MergedNodeSet) Merge(other *NodeSet) error { func (set *MergedNodeSet) Merge(other *NodeSet) error {
subset, present := set.Sets[other.Owner] subset, present := set.Sets[other.Owner]
if present { if present {
return subset.Merge(other.Owner, other.Nodes) return subset.Merge(other)
} }
set.Sets[other.Owner] = other set.Sets[other.Owner] = other
return nil return nil

View file

@ -17,13 +17,100 @@
package trienode package trienode
import ( import (
"bytes"
"crypto/rand" "crypto/rand"
"maps"
"reflect"
"slices"
"testing" "testing"
"github.com/davecgh/go-spew/spew"
"github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/internal/testrand"
) )
func makeTestSet(owner common.Hash, n int, paths [][]byte) *NodeSet {
set := NewNodeSet(owner)
for i := 0; i < n*3/4; i++ {
path := testrand.Bytes(10)
blob := testrand.Bytes(100)
set.AddNode(path, NewNodeWithPrev(crypto.Keccak256Hash(blob), blob, testrand.Bytes(100)))
}
for i := 0; i < n/4; i++ {
path := testrand.Bytes(10)
set.AddNode(path, NewDeletedWithPrev(testrand.Bytes(100)))
}
for i := 0; i < len(paths); i++ {
if i%3 == 0 {
set.AddNode(paths[i], NewDeletedWithPrev(testrand.Bytes(100)))
} else {
blob := testrand.Bytes(100)
set.AddNode(paths[i], NewNodeWithPrev(crypto.Keccak256Hash(blob), blob, testrand.Bytes(100)))
}
}
return set
}
func copyNodeSet(set *NodeSet) *NodeSet {
cpy := &NodeSet{
Owner: set.Owner,
Leaves: slices.Clone(set.Leaves),
updates: set.updates,
deletes: set.deletes,
Nodes: maps.Clone(set.Nodes),
Origins: maps.Clone(set.Origins),
}
return cpy
}
func TestNodeSetMerge(t *testing.T) {
var shared [][]byte
for i := 0; i < 2; i++ {
shared = append(shared, testrand.Bytes(10))
}
owner := testrand.Hash()
setA := makeTestSet(owner, 20, shared)
cpyA := copyNodeSet(setA)
setB := makeTestSet(owner, 20, shared)
setA.Merge(setB)
for path, node := range setA.Nodes {
nA, inA := cpyA.Nodes[path]
nB, inB := setB.Nodes[path]
switch {
case inA && inB:
origin := setA.Origins[path]
if !bytes.Equal(origin, cpyA.Origins[path]) {
t.Errorf("Unexpected origin, path %v: want: %v, got: %v", []byte(path), cpyA.Origins[path], origin)
}
if !reflect.DeepEqual(node, nB) {
t.Errorf("Unexpected node, path %v: want: %v, got: %v", []byte(path), spew.Sdump(nB), spew.Sdump(node))
}
case !inA && inB:
origin := setA.Origins[path]
if !bytes.Equal(origin, setB.Origins[path]) {
t.Errorf("Unexpected origin, path %v: want: %v, got: %v", []byte(path), setB.Origins[path], origin)
}
if !reflect.DeepEqual(node, nB) {
t.Errorf("Unexpected node, path %v: want: %v, got: %v", []byte(path), spew.Sdump(nB), spew.Sdump(node))
}
case inA && !inB:
origin := setA.Origins[path]
if !bytes.Equal(origin, cpyA.Origins[path]) {
t.Errorf("Unexpected origin, path %v: want: %v, got: %v", []byte(path), cpyA.Origins[path], origin)
}
if !reflect.DeepEqual(node, nA) {
t.Errorf("Unexpected node, path %v: want: %v, got: %v", []byte(path), spew.Sdump(nA), spew.Sdump(node))
}
default:
t.Errorf("Unexpected node, %v", []byte(path))
}
}
}
func BenchmarkMerge(b *testing.B) { func BenchmarkMerge(b *testing.B) {
b.Run("1K", func(b *testing.B) { b.Run("1K", func(b *testing.B) {
benchmarkMerge(b, 1000) benchmarkMerge(b, 1000)
@ -42,7 +129,7 @@ func benchmarkMerge(b *testing.B, count int) {
blob := make([]byte, 32) blob := make([]byte, 32)
rand.Read(blob) rand.Read(blob)
hash := crypto.Keccak256Hash(blob) hash := crypto.Keccak256Hash(blob)
s.AddNode(path, New(hash, blob)) s.AddNode(path, NewNodeWithPrev(hash, blob, nil))
} }
for i := 0; i < count; i++ { for i := 0; i < count; i++ {
// Random path of 4 nibbles // Random path of 4 nibbles
@ -53,9 +140,9 @@ func benchmarkMerge(b *testing.B, count int) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
// Store set x into a backup // Store set x into a backup
z := NewNodeSet(common.Hash{}) z := NewNodeSet(common.Hash{})
z.Merge(common.Hash{}, x.Nodes) z.Merge(x)
// Merge y into x // Merge y into x
x.Merge(common.Hash{}, y.Nodes) x.Merge(y)
x = z x = z
} }
} }

View file

@ -42,6 +42,7 @@ type VerkleTrie struct {
root verkle.VerkleNode root verkle.VerkleNode
cache *utils.PointCache cache *utils.PointCache
reader *trieReader reader *trieReader
tracer *prevalueTracer
} }
// NewVerkleTrie constructs a verkle tree based on the specified root hash. // NewVerkleTrie constructs a verkle tree based on the specified root hash.
@ -50,27 +51,25 @@ func NewVerkleTrie(root common.Hash, db database.NodeDatabase, cache *utils.Poin
if err != nil { if err != nil {
return nil, err return nil, err
} }
// Parse the root verkle node if it's not empty. t := &VerkleTrie{
node := verkle.New() root: verkle.New(),
if root != types.EmptyVerkleHash && root != types.EmptyRootHash {
blob, err := reader.node(nil, common.Hash{})
if err != nil {
return nil, err
}
node, err = verkle.ParseNode(blob, 0)
if err != nil {
return nil, err
}
}
return &VerkleTrie{
root: node,
cache: cache, cache: cache,
reader: reader, reader: reader,
}, nil tracer: newPrevalueTracer(),
} }
// Parse the root verkle node if it's not empty.
func (t *VerkleTrie) FlatdbNodeResolver(path []byte) ([]byte, error) { if root != types.EmptyVerkleHash && root != types.EmptyRootHash {
return t.reader.node(path, common.Hash{}) blob, err := t.nodeResolver(nil)
if err != nil {
return nil, err
}
node, err := verkle.ParseNode(blob, 0)
if err != nil {
return nil, err
}
t.root = node
}
return t, nil
} }
// GetKey returns the sha3 preimage of a hashed key that was previously used // GetKey returns the sha3 preimage of a hashed key that was previously used
@ -268,7 +267,7 @@ func (t *VerkleTrie) Commit(_ bool) (common.Hash, *trienode.NodeSet) {
nodeset := trienode.NewNodeSet(common.Hash{}) nodeset := trienode.NewNodeSet(common.Hash{})
for _, node := range nodes { for _, node := range nodes {
// Hash parameter is not used in pathdb // Hash parameter is not used in pathdb
nodeset.AddNode(node.Path, trienode.New(common.Hash{}, node.SerializedBytes)) nodeset.AddNode(node.Path, trienode.NewNodeWithPrev(common.Hash{}, node.SerializedBytes, t.tracer.get(node.Path)))
} }
// Serialize root commitment form // Serialize root commitment form
return t.Hash(), nodeset return t.Hash(), nodeset
@ -301,6 +300,7 @@ func (t *VerkleTrie) Copy() *VerkleTrie {
root: t.root.Copy(), root: t.root.Copy(),
cache: t.cache, cache: t.cache,
reader: t.reader, reader: t.reader,
tracer: t.tracer.copy(),
} }
} }
@ -317,7 +317,7 @@ func (t *VerkleTrie) Proof(posttrie *VerkleTrie, keys [][]byte) (*verkle.VerkleP
if posttrie != nil { if posttrie != nil {
postroot = posttrie.root postroot = posttrie.root
} }
proof, _, _, _, err := verkle.MakeVerkleMultiProof(t.root, postroot, keys, t.FlatdbNodeResolver) proof, _, _, _, err := verkle.MakeVerkleMultiProof(t.root, postroot, keys, t.nodeResolver)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
@ -421,7 +421,12 @@ func (t *VerkleTrie) ToDot() string {
} }
func (t *VerkleTrie) nodeResolver(path []byte) ([]byte, error) { func (t *VerkleTrie) nodeResolver(path []byte) ([]byte, error) {
return t.reader.node(path, common.Hash{}) blob, err := t.reader.node(path, common.Hash{})
if err != nil {
return nil, err
}
t.tracer.put(path, blob)
return blob, nil
} }
// Witness returns a set containing all trie nodes that have been accessed. // Witness returns a set containing all trie nodes that have been accessed.