trie, core: rework tracer and track origin value of dirty nodes (#32306)
Some checks are pending
/ Linux Build (push) Waiting to run
/ Linux Build (arm) (push) Waiting to run
/ Windows Build (push) Waiting to run
/ Docker Image (push) Waiting to run

These changes made in the PR should be highlighted here

The trie tracer is split into two distinct structs: opTracer and prevalueTracer. 
The former is specific to MPT, while the latter is generic and applicable to all
trie implementations.

The original values of dirty nodes are tracked in a NodeSet. This serves
as the foundation for both full archive node implementations and the state live
tracer.
This commit is contained in:
rjl493456442 2025-08-11 21:55:38 +08:00 committed by GitHub
parent 55a471efaf
commit cbbf686ecc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 383 additions and 178 deletions

View file

@ -977,7 +977,7 @@ func (s *StateDB) fastDeleteStorage(snaps *snapshot.Tree, addrHash common.Hash,
storageOrigins = make(map[common.Hash][]byte) // the set for tracking the original value of slot
)
stack := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
nodes.AddNode(path, trienode.NewDeleted())
nodes.AddNode(path, trienode.NewDeletedWithPrev(blob))
})
for iter.Next() {
slot := common.CopyBytes(iter.Slot())
@ -1028,7 +1028,7 @@ func (s *StateDB) slowDeleteStorage(addr common.Address, addrHash common.Hash, r
if it.Hash() == (common.Hash{}) {
continue
}
nodes.AddNode(it.Path(), trienode.NewDeleted())
nodes.AddNode(it.Path(), trienode.NewDeletedWithPrev(it.NodeBlob()))
}
if err := it.Error(); err != nil {
return nil, nil, nil, err
@ -1160,7 +1160,7 @@ func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool) (*stateU
//
// Given that some accounts may be destroyed and then recreated within
// the same block, it's possible that a node set with the same owner
// may already exists. In such cases, these two sets are combined, with
// may already exist. In such cases, these two sets are combined, with
// the later one overwriting the previous one if any nodes are modified
// or deleted in both sets.
//

View file

@ -29,12 +29,12 @@ import (
// insertion order.
type committer struct {
nodes *trienode.NodeSet
tracer *tracer
tracer *prevalueTracer
collectLeaf bool
}
// newCommitter creates a new committer or picks one from the pool.
func newCommitter(nodeset *trienode.NodeSet, tracer *tracer, collectLeaf bool) *committer {
func newCommitter(nodeset *trienode.NodeSet, tracer *prevalueTracer, collectLeaf bool) *committer {
return &committer{
nodes: nodeset,
tracer: tracer,
@ -110,14 +110,16 @@ func (c *committer) commitChildren(path []byte, n *fullNode, parallel bool) {
} else {
wg.Add(1)
go func(index int) {
defer wg.Done()
p := append(path, byte(index))
childSet := trienode.NewNodeSet(c.nodes.Owner)
childCommitter := newCommitter(childSet, c.tracer, c.collectLeaf)
n.Children[index] = childCommitter.commit(p, child, false)
nodesMu.Lock()
c.nodes.MergeSet(childSet)
c.nodes.MergeDisjoint(childSet)
nodesMu.Unlock()
wg.Done()
}(i)
}
}
@ -140,15 +142,15 @@ func (c *committer) store(path []byte, n node) node {
// The node is embedded in its parent, in other words, this node
// will not be stored in the database independently, mark it as
// deleted only if the node was existent in database before.
_, ok := c.tracer.accessList[string(path)]
if ok {
c.nodes.AddNode(path, trienode.NewDeleted())
origin := c.tracer.get(path)
if len(origin) != 0 {
c.nodes.AddNode(path, trienode.NewDeletedWithPrev(origin))
}
return n
}
// Collect the dirty node to nodeset for return.
nhash := common.BytesToHash(hash)
c.nodes.AddNode(path, trienode.New(nhash, nodeToBytes(n)))
c.nodes.AddNode(path, trienode.NewNodeWithPrev(nhash, nodeToBytes(n), c.tracer.get(path)))
// Collect the corresponding leaf node if it's required. We don't check
// full node since it's impossible to store value in fullNode. The key

View file

@ -567,7 +567,12 @@ func VerifyRangeProof(rootHash common.Hash, firstKey []byte, keys [][]byte, valu
}
// Rebuild the trie with the leaf stream, the shape of trie
// should be same with the original one.
tr := &Trie{root: root, reader: newEmptyReader(), tracer: newTracer()}
tr := &Trie{
root: root,
reader: newEmptyReader(),
opTracer: newOpTracer(),
prevalueTracer: newPrevalueTracer(),
}
if empty {
tr.root = nil
}

View file

@ -18,14 +18,13 @@ package trie
import (
"maps"
"github.com/ethereum/go-ethereum/common"
"slices"
)
// tracer tracks the changes of trie nodes. During the trie operations,
// opTracer tracks the changes of trie nodes. During the trie operations,
// some nodes can be deleted from the trie, while these deleted nodes
// won't be captured by trie.Hasher or trie.Committer. Thus, these deleted
// nodes won't be removed from the disk at all. Tracer is an auxiliary tool
// nodes won't be removed from the disk at all. opTracer is an auxiliary tool
// used to track all insert and delete operations of trie and capture all
// deleted nodes eventually.
//
@ -35,38 +34,25 @@ import (
// This tool can track all of them no matter the node is embedded in its
// parent or not, but valueNode is never tracked.
//
// Besides, it's also used for recording the original value of the nodes
// when they are resolved from the disk. The pre-value of the nodes will
// be used to construct trie history in the future.
//
// Note tracer is not thread-safe, callers should be responsible for handling
// Note opTracer is not thread-safe, callers should be responsible for handling
// the concurrency issues by themselves.
type tracer struct {
inserts map[string]struct{}
deletes map[string]struct{}
accessList map[string][]byte
type opTracer struct {
inserts map[string]struct{}
deletes map[string]struct{}
}
// newTracer initializes the tracer for capturing trie changes.
func newTracer() *tracer {
return &tracer{
inserts: make(map[string]struct{}),
deletes: make(map[string]struct{}),
accessList: make(map[string][]byte),
// newOpTracer initializes the tracer for capturing trie changes.
func newOpTracer() *opTracer {
return &opTracer{
inserts: make(map[string]struct{}),
deletes: make(map[string]struct{}),
}
}
// onRead tracks the newly loaded trie node and caches the rlp-encoded
// blob internally. Don't change the value outside of function since
// it's not deep-copied.
func (t *tracer) onRead(path []byte, val []byte) {
t.accessList[string(path)] = val
}
// onInsert tracks the newly inserted trie node. If it's already
// in the deletion set (resurrected node), then just wipe it from
// the deletion set as it's "untouched".
func (t *tracer) onInsert(path []byte) {
func (t *opTracer) onInsert(path []byte) {
if _, present := t.deletes[string(path)]; present {
delete(t.deletes, string(path))
return
@ -77,7 +63,7 @@ func (t *tracer) onInsert(path []byte) {
// onDelete tracks the newly deleted trie node. If it's already
// in the addition set, then just wipe it from the addition set
// as it's untouched.
func (t *tracer) onDelete(path []byte) {
func (t *opTracer) onDelete(path []byte) {
if _, present := t.inserts[string(path)]; present {
delete(t.inserts, string(path))
return
@ -86,37 +72,83 @@ func (t *tracer) onDelete(path []byte) {
}
// reset clears the content tracked by tracer.
func (t *tracer) reset() {
t.inserts = make(map[string]struct{})
t.deletes = make(map[string]struct{})
t.accessList = make(map[string][]byte)
func (t *opTracer) reset() {
clear(t.inserts)
clear(t.deletes)
}
// copy returns a deep copied tracer instance.
func (t *tracer) copy() *tracer {
accessList := make(map[string][]byte, len(t.accessList))
for path, blob := range t.accessList {
accessList[path] = common.CopyBytes(blob)
}
return &tracer{
inserts: maps.Clone(t.inserts),
deletes: maps.Clone(t.deletes),
accessList: accessList,
func (t *opTracer) copy() *opTracer {
return &opTracer{
inserts: maps.Clone(t.inserts),
deletes: maps.Clone(t.deletes),
}
}
// deletedNodes returns a list of node paths which are deleted from the trie.
func (t *tracer) deletedNodes() []string {
var paths []string
// deletedList returns a list of node paths which are deleted from the trie.
func (t *opTracer) deletedList() [][]byte {
paths := make([][]byte, 0, len(t.deletes))
for path := range t.deletes {
// It's possible a few deleted nodes were embedded
// in their parent before, the deletions can be no
// effect by deleting nothing, filter them out.
_, ok := t.accessList[path]
if !ok {
continue
}
paths = append(paths, path)
paths = append(paths, []byte(path))
}
return paths
}
// prevalueTracer tracks the original values of resolved trie nodes. Cached trie
// node values are expected to be immutable. A zero-size node value is treated as
// non-existent and should not occur in practice.
//
// Note prevalueTracer is not thread-safe, callers should be responsible for
// handling the concurrency issues by themselves.
type prevalueTracer struct {
data map[string][]byte
}
// newPrevalueTracer initializes the tracer for capturing resolved trie nodes.
func newPrevalueTracer() *prevalueTracer {
return &prevalueTracer{
data: make(map[string][]byte),
}
}
// put tracks the newly loaded trie node and caches its RLP-encoded
// blob internally. Do not modify the value outside this function,
// as it is not deep-copied.
func (t *prevalueTracer) put(path []byte, val []byte) {
t.data[string(path)] = val
}
// get returns the cached trie node value. If the node is not found, nil will
// be returned.
func (t *prevalueTracer) get(path []byte) []byte {
return t.data[string(path)]
}
// hasList returns a list of flags indicating whether the corresponding trie nodes
// specified by the path exist in the trie.
func (t *prevalueTracer) hasList(list [][]byte) []bool {
exists := make([]bool, 0, len(list))
for _, path := range list {
_, ok := t.data[string(path)]
exists = append(exists, ok)
}
return exists
}
// values returns a list of values of the cached trie nodes.
func (t *prevalueTracer) values() [][]byte {
return slices.Collect(maps.Values(t.data))
}
// reset resets the cached content in the prevalueTracer.
func (t *prevalueTracer) reset() {
clear(t.data)
}
// copy returns a copied prevalueTracer instance.
func (t *prevalueTracer) copy() *prevalueTracer {
// Shadow clone is used, as the cached trie node values are immutable
return &prevalueTracer{
data: maps.Clone(t.data),
}
}

View file

@ -52,15 +52,15 @@ var (
}
)
func TestTrieTracer(t *testing.T) {
testTrieTracer(t, tiny)
testTrieTracer(t, nonAligned)
testTrieTracer(t, standard)
func TestTrieOpTracer(t *testing.T) {
testTrieOpTracer(t, tiny)
testTrieOpTracer(t, nonAligned)
testTrieOpTracer(t, standard)
}
// Tests if the trie diffs are tracked correctly. Tracer should capture
// all non-leaf dirty nodes, no matter the node is embedded or not.
func testTrieTracer(t *testing.T, vals []struct{ k, v string }) {
func testTrieOpTracer(t *testing.T, vals []struct{ k, v string }) {
db := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)
trie := NewEmpty(db)
@ -68,8 +68,9 @@ func testTrieTracer(t *testing.T, vals []struct{ k, v string }) {
for _, val := range vals {
trie.MustUpdate([]byte(val.k), []byte(val.v))
}
insertSet := copySet(trie.tracer.inserts) // copy before commit
deleteSet := copySet(trie.tracer.deletes) // copy before commit
insertSet := copySet(trie.opTracer.inserts) // copy before commit
deleteSet := copySet(trie.opTracer.deletes) // copy before commit
root, nodes := trie.Commit(false)
db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes))
@ -86,7 +87,7 @@ func testTrieTracer(t *testing.T, vals []struct{ k, v string }) {
for _, val := range vals {
trie.MustDelete([]byte(val.k))
}
insertSet, deleteSet = copySet(trie.tracer.inserts), copySet(trie.tracer.deletes)
insertSet, deleteSet = copySet(trie.opTracer.inserts), copySet(trie.opTracer.deletes)
if !compareSet(insertSet, nil) {
t.Fatal("Unexpected insertion set")
}
@ -97,13 +98,13 @@ func testTrieTracer(t *testing.T, vals []struct{ k, v string }) {
// Test that after inserting a new batch of nodes and deleting them immediately,
// the trie tracer should be cleared normally as no operation happened.
func TestTrieTracerNoop(t *testing.T) {
testTrieTracerNoop(t, tiny)
testTrieTracerNoop(t, nonAligned)
testTrieTracerNoop(t, standard)
func TestTrieOpTracerNoop(t *testing.T) {
testTrieOpTracerNoop(t, tiny)
testTrieOpTracerNoop(t, nonAligned)
testTrieOpTracerNoop(t, standard)
}
func testTrieTracerNoop(t *testing.T, vals []struct{ k, v string }) {
func testTrieOpTracerNoop(t *testing.T, vals []struct{ k, v string }) {
db := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)
trie := NewEmpty(db)
for _, val := range vals {
@ -112,22 +113,22 @@ func testTrieTracerNoop(t *testing.T, vals []struct{ k, v string }) {
for _, val := range vals {
trie.MustDelete([]byte(val.k))
}
if len(trie.tracer.inserts) != 0 {
if len(trie.opTracer.inserts) != 0 {
t.Fatal("Unexpected insertion set")
}
if len(trie.tracer.deletes) != 0 {
if len(trie.opTracer.deletes) != 0 {
t.Fatal("Unexpected deletion set")
}
}
// Tests if the accessList is correctly tracked.
func TestAccessList(t *testing.T) {
testAccessList(t, tiny)
testAccessList(t, nonAligned)
testAccessList(t, standard)
// Tests if the original value of trie nodes are correctly tracked.
func TestPrevalueTracer(t *testing.T) {
testPrevalueTracer(t, tiny)
testPrevalueTracer(t, nonAligned)
testPrevalueTracer(t, standard)
}
func testAccessList(t *testing.T, vals []struct{ k, v string }) {
func testPrevalueTracer(t *testing.T, vals []struct{ k, v string }) {
var (
db = newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)
trie = NewEmpty(db)
@ -210,7 +211,7 @@ func testAccessList(t *testing.T, vals []struct{ k, v string }) {
}
// Tests origin values won't be tracked in Iterator or Prover
func TestAccessListLeak(t *testing.T) {
func TestPrevalueTracerLeak(t *testing.T) {
var (
db = newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)
trie = NewEmpty(db)
@ -249,9 +250,9 @@ func TestAccessListLeak(t *testing.T) {
}
for _, c := range cases {
trie, _ = New(TrieID(root), db)
n1 := len(trie.tracer.accessList)
n1 := len(trie.prevalueTracer.data)
c.op(trie)
n2 := len(trie.tracer.accessList)
n2 := len(trie.prevalueTracer.data)
if n1 != n2 {
t.Fatalf("AccessList is leaked, prev %d after %d", n1, n2)

View file

@ -55,8 +55,9 @@ type Trie struct {
// reader is the handler trie can retrieve nodes from.
reader *trieReader
// tracer is the tool to track the trie changes.
tracer *tracer
// Various tracers for capturing the modifications to trie
opTracer *opTracer
prevalueTracer *prevalueTracer
}
// newFlag returns the cache flag value for a newly created node.
@ -67,13 +68,14 @@ func (t *Trie) newFlag() nodeFlag {
// Copy returns a copy of Trie.
func (t *Trie) Copy() *Trie {
return &Trie{
root: copyNode(t.root),
owner: t.owner,
committed: t.committed,
unhashed: t.unhashed,
uncommitted: t.uncommitted,
reader: t.reader,
tracer: t.tracer.copy(),
root: copyNode(t.root),
owner: t.owner,
committed: t.committed,
unhashed: t.unhashed,
uncommitted: t.uncommitted,
reader: t.reader,
opTracer: t.opTracer.copy(),
prevalueTracer: t.prevalueTracer.copy(),
}
}
@ -89,9 +91,10 @@ func New(id *ID, db database.NodeDatabase) (*Trie, error) {
return nil, err
}
trie := &Trie{
owner: id.Owner,
reader: reader,
tracer: newTracer(),
owner: id.Owner,
reader: reader,
opTracer: newOpTracer(),
prevalueTracer: newPrevalueTracer(),
}
if id.Root != (common.Hash{}) && id.Root != types.EmptyRootHash {
rootnode, err := trie.resolveAndTrack(id.Root[:], nil)
@ -361,7 +364,7 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error
// New branch node is created as a child of the original short node.
// Track the newly inserted node in the tracer. The node identifier
// passed is the path from the root node.
t.tracer.onInsert(append(prefix, key[:matchlen]...))
t.opTracer.onInsert(append(prefix, key[:matchlen]...))
// Replace it with a short node leading up to the branch.
return true, &shortNode{key[:matchlen], branch, t.newFlag()}, nil
@ -379,7 +382,7 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error
// New short node is created and track it in the tracer. The node identifier
// passed is the path from the root node. Note the valueNode won't be tracked
// since it's always embedded in its parent.
t.tracer.onInsert(prefix)
t.opTracer.onInsert(prefix)
return true, &shortNode{key, value, t.newFlag()}, nil
@ -444,7 +447,7 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) {
// The matched short node is deleted entirely and track
// it in the deletion set. The same the valueNode doesn't
// need to be tracked at all since it's always embedded.
t.tracer.onDelete(prefix)
t.opTracer.onDelete(prefix)
return true, nil, nil // remove n entirely for whole matches
}
@ -460,7 +463,7 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) {
case *shortNode:
// The child shortNode is merged into its parent, track
// is deleted as well.
t.tracer.onDelete(append(prefix, n.Key...))
t.opTracer.onDelete(append(prefix, n.Key...))
// Deleting from the subtrie reduced it to another
// short node. Merge the nodes to avoid creating a
@ -525,7 +528,7 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) {
// Replace the entire full node with the short node.
// Mark the original short node as deleted since the
// value is embedded into the parent now.
t.tracer.onDelete(append(prefix, byte(pos)))
t.opTracer.onDelete(append(prefix, byte(pos)))
k := append([]byte{byte(pos)}, cnode.Key...)
return true, &shortNode{k, cnode.Val, t.newFlag()}, nil
@ -616,13 +619,31 @@ func (t *Trie) resolveAndTrack(n hashNode, prefix []byte) (node, error) {
if err != nil {
return nil, err
}
t.tracer.onRead(prefix, blob)
t.prevalueTracer.put(prefix, blob)
// The returned node blob won't be changed afterward. No need to
// deep-copy the slice.
return decodeNodeUnsafe(n, blob)
}
// deletedNodes returns a list of node paths, referring the nodes being deleted
// from the trie. It's possible a few deleted nodes were embedded in their parent
// before, the deletions can be no effect by deleting nothing, filter them out.
func (t *Trie) deletedNodes() [][]byte {
var (
pos int
list = t.opTracer.deletedList()
flags = t.prevalueTracer.hasList(list)
)
for i := 0; i < len(list); i++ {
if flags[i] {
list[pos] = list[i]
pos++
}
}
return list[:pos] // trim to the new length
}
// Hash returns the root hash of the trie. It does not write to the
// database and can be used even if the trie doesn't have one.
func (t *Trie) Hash() common.Hash {
@ -644,13 +665,13 @@ func (t *Trie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) {
// (b) The trie was non-empty and all nodes are dropped => return
// the node set includes all deleted nodes
if t.root == nil {
paths := t.tracer.deletedNodes()
paths := t.deletedNodes()
if len(paths) == 0 {
return types.EmptyRootHash, nil // case (a)
}
nodes := trienode.NewNodeSet(t.owner)
for _, path := range paths {
nodes.AddNode([]byte(path), trienode.NewDeleted())
nodes.AddNode(path, trienode.NewDeletedWithPrev(t.prevalueTracer.get(path)))
}
return types.EmptyRootHash, nodes // case (b)
}
@ -667,11 +688,11 @@ func (t *Trie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) {
return rootHash, nil
}
nodes := trienode.NewNodeSet(t.owner)
for _, path := range t.tracer.deletedNodes() {
nodes.AddNode([]byte(path), trienode.NewDeleted())
for _, path := range t.deletedNodes() {
nodes.AddNode(path, trienode.NewDeletedWithPrev(t.prevalueTracer.get(path)))
}
// If the number of changes is below 100, we let one thread handle it
t.root = newCommitter(nodes, t.tracer, collectLeaf).Commit(t.root, t.uncommitted > 100)
t.root = newCommitter(nodes, t.prevalueTracer, collectLeaf).Commit(t.root, t.uncommitted > 100)
t.uncommitted = 0
return rootHash, nodes
}
@ -692,12 +713,13 @@ func (t *Trie) hashRoot() []byte {
// Witness returns a set containing all trie nodes that have been accessed.
func (t *Trie) Witness() map[string]struct{} {
if len(t.tracer.accessList) == 0 {
values := t.prevalueTracer.values()
if len(values) == 0 {
return nil
}
witness := make(map[string]struct{}, len(t.tracer.accessList))
for _, node := range t.tracer.accessList {
witness[string(node)] = struct{}{}
witness := make(map[string]struct{}, len(values))
for _, val := range values {
witness[string(val)] = struct{}{}
}
return witness
}
@ -708,6 +730,7 @@ func (t *Trie) Reset() {
t.owner = common.Hash{}
t.unhashed = 0
t.uncommitted = 0
t.tracer.reset()
t.opTracer.reset()
t.prevalueTracer.reset()
t.committed = false
}

View file

@ -449,35 +449,35 @@ func verifyAccessList(old *Trie, new *Trie, set *trienode.NodeSet) error {
if !ok || n.IsDeleted() {
return errors.New("expect new node")
}
//if len(n.Prev) > 0 {
// return errors.New("unexpected origin value")
//}
if len(set.Origins[path]) > 0 {
return errors.New("unexpected origin value")
}
}
// Check deletion set
for path := range deletes {
for path, blob := range deletes {
n, ok := set.Nodes[path]
if !ok || !n.IsDeleted() {
return errors.New("expect deleted node")
}
//if len(n.Prev) == 0 {
// return errors.New("expect origin value")
//}
//if !bytes.Equal(n.Prev, blob) {
// return errors.New("invalid origin value")
//}
if len(set.Origins[path]) == 0 {
return errors.New("expect origin value")
}
if !bytes.Equal(set.Origins[path], blob) {
return errors.New("invalid origin value")
}
}
// Check update set
for path := range updates {
for path, blob := range updates {
n, ok := set.Nodes[path]
if !ok || n.IsDeleted() {
return errors.New("expect updated node")
}
//if len(n.Prev) == 0 {
// return errors.New("expect origin value")
//}
//if !bytes.Equal(n.Prev, blob) {
// return errors.New("invalid origin value")
//}
if len(set.Origins[path]) == 0 {
return errors.New("expect origin value")
}
if !bytes.Equal(set.Origins[path], blob) {
return errors.New("invalid origin value")
}
}
return nil
}
@ -595,18 +595,18 @@ func runRandTest(rt randTest) error {
deleteExp[path] = struct{}{}
}
}
if len(insertExp) != len(tr.tracer.inserts) {
if len(insertExp) != len(tr.opTracer.inserts) {
rt[i].err = errors.New("insert set mismatch")
}
if len(deleteExp) != len(tr.tracer.deletes) {
if len(deleteExp) != len(tr.opTracer.deletes) {
rt[i].err = errors.New("delete set mismatch")
}
for insert := range tr.tracer.inserts {
for insert := range tr.opTracer.inserts {
if _, present := insertExp[insert]; !present {
rt[i].err = errors.New("missing inserted node")
}
}
for del := range tr.tracer.deletes {
for del := range tr.opTracer.deletes {
if _, present := deleteExp[del]; !present {
rt[i].err = errors.New("missing deleted node")
}

View file

@ -51,6 +51,35 @@ func New(hash common.Hash, blob []byte) *Node {
// NewDeleted constructs a node which is deleted.
func NewDeleted() *Node { return New(common.Hash{}, nil) }
// NodeWithPrev is a wrapper over Node by tracking the original value of node.
type NodeWithPrev struct {
*Node
Prev []byte // Nil means the node was not existent
}
// NewNodeWithPrev constructs a node with the additional original value.
func NewNodeWithPrev(hash common.Hash, blob []byte, prev []byte) *NodeWithPrev {
return &NodeWithPrev{
Node: &Node{
Hash: hash,
Blob: blob,
},
Prev: prev,
}
}
// NewDeletedWithPrev constructs a node which is deleted with the additional
// original value.
func NewDeletedWithPrev(prev []byte) *NodeWithPrev {
return &NodeWithPrev{
Node: &Node{
Hash: common.Hash{},
Blob: nil,
},
Prev: prev,
}
}
// leaf represents a trie leaf node
type leaf struct {
Blob []byte // raw blob of leaf
@ -63,6 +92,8 @@ type NodeSet struct {
Owner common.Hash
Leaves []*leaf
Nodes map[string]*Node
Origins map[string][]byte
updates int // the count of updated and inserted nodes
deletes int // the count of deleted nodes
}
@ -71,8 +102,9 @@ type NodeSet struct {
// the owning account address hash for storage tries.
func NewNodeSet(owner common.Hash) *NodeSet {
return &NodeSet{
Owner: owner,
Nodes: make(map[string]*Node),
Owner: owner,
Nodes: make(map[string]*Node),
Origins: make(map[string][]byte),
}
}
@ -91,22 +123,25 @@ func (set *NodeSet) ForEachWithOrder(callback func(path string, n *Node)) {
}
// AddNode adds the provided node into set.
func (set *NodeSet) AddNode(path []byte, n *Node) {
func (set *NodeSet) AddNode(path []byte, n *NodeWithPrev) {
if n.IsDeleted() {
set.deletes += 1
} else {
set.updates += 1
}
set.Nodes[string(path)] = n
key := string(path)
set.Nodes[key] = n.Node
set.Origins[key] = n.Prev
}
// MergeSet merges this 'set' with 'other'. It assumes that the sets are disjoint,
// MergeDisjoint merges this 'set' with 'other'. It assumes that the sets are disjoint,
// and thus does not deduplicate data (count deletes, dedup leaves etc).
func (set *NodeSet) MergeSet(other *NodeSet) error {
func (set *NodeSet) MergeDisjoint(other *NodeSet) error {
if set.Owner != other.Owner {
return fmt.Errorf("nodesets belong to different owner are not mergeable %x-%x", set.Owner, other.Owner)
}
maps.Copy(set.Nodes, other.Nodes)
maps.Copy(set.Origins, other.Origins)
set.deletes += other.deletes
set.updates += other.updates
@ -117,12 +152,13 @@ func (set *NodeSet) MergeSet(other *NodeSet) error {
return nil
}
// Merge adds a set of nodes into the set.
func (set *NodeSet) Merge(owner common.Hash, nodes map[string]*Node) error {
if set.Owner != owner {
return fmt.Errorf("nodesets belong to different owner are not mergeable %x-%x", set.Owner, owner)
// Merge adds a set of nodes to the current set. It assumes the sets may overlap,
// so deduplication is performed.
func (set *NodeSet) Merge(other *NodeSet) error {
if set.Owner != other.Owner {
return fmt.Errorf("nodesets belong to different owner are not mergeable %x-%x", set.Owner, other.Owner)
}
for path, node := range nodes {
for path, node := range other.Nodes {
prev, ok := set.Nodes[path]
if ok {
// overwrite happens, revoke the counter
@ -137,8 +173,17 @@ func (set *NodeSet) Merge(owner common.Hash, nodes map[string]*Node) error {
} else {
set.updates += 1
}
set.Nodes[path] = node
set.Nodes[path] = node // overwrite the node with new value
// Add the original value only if it was previously non-existent.
// If multiple mutations are made to the same node, the first one
// is considered the true original value.
if _, exist := set.Origins[path]; !exist {
set.Origins[path] = other.Origins[path]
}
}
// TODO leaves are not aggregated, as they are not used in storage tries.
// TODO(rjl493456442) deprecate the leaves along with the legacy hash mode.
return nil
}
@ -169,11 +214,16 @@ func (set *NodeSet) Summary() string {
for path, n := range set.Nodes {
// Deletion
if n.IsDeleted() {
fmt.Fprintf(out, " [-]: %x\n", path)
fmt.Fprintf(out, " [-]: %x prev: %x\n", path, set.Origins[path])
continue
}
// Insertion or update
fmt.Fprintf(out, " [+/*]: %x -> %v \n", path, n.Hash)
// Insertion
if len(set.Origins[path]) == 0 {
fmt.Fprintf(out, " [+]: %x -> %v\n", path, n.Hash)
continue
}
// Update
fmt.Fprintf(out, " [*]: %x -> %v prev: %x\n", path, n.Hash, set.Origins[path])
}
for _, n := range set.Leaves {
fmt.Fprintf(out, "[leaf]: %v\n", n)
@ -203,7 +253,7 @@ func NewWithNodeSet(set *NodeSet) *MergedNodeSet {
func (set *MergedNodeSet) Merge(other *NodeSet) error {
subset, present := set.Sets[other.Owner]
if present {
return subset.Merge(other.Owner, other.Nodes)
return subset.Merge(other)
}
set.Sets[other.Owner] = other
return nil

View file

@ -17,13 +17,100 @@
package trienode
import (
"bytes"
"crypto/rand"
"maps"
"reflect"
"slices"
"testing"
"github.com/davecgh/go-spew/spew"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/internal/testrand"
)
func makeTestSet(owner common.Hash, n int, paths [][]byte) *NodeSet {
set := NewNodeSet(owner)
for i := 0; i < n*3/4; i++ {
path := testrand.Bytes(10)
blob := testrand.Bytes(100)
set.AddNode(path, NewNodeWithPrev(crypto.Keccak256Hash(blob), blob, testrand.Bytes(100)))
}
for i := 0; i < n/4; i++ {
path := testrand.Bytes(10)
set.AddNode(path, NewDeletedWithPrev(testrand.Bytes(100)))
}
for i := 0; i < len(paths); i++ {
if i%3 == 0 {
set.AddNode(paths[i], NewDeletedWithPrev(testrand.Bytes(100)))
} else {
blob := testrand.Bytes(100)
set.AddNode(paths[i], NewNodeWithPrev(crypto.Keccak256Hash(blob), blob, testrand.Bytes(100)))
}
}
return set
}
func copyNodeSet(set *NodeSet) *NodeSet {
cpy := &NodeSet{
Owner: set.Owner,
Leaves: slices.Clone(set.Leaves),
updates: set.updates,
deletes: set.deletes,
Nodes: maps.Clone(set.Nodes),
Origins: maps.Clone(set.Origins),
}
return cpy
}
func TestNodeSetMerge(t *testing.T) {
var shared [][]byte
for i := 0; i < 2; i++ {
shared = append(shared, testrand.Bytes(10))
}
owner := testrand.Hash()
setA := makeTestSet(owner, 20, shared)
cpyA := copyNodeSet(setA)
setB := makeTestSet(owner, 20, shared)
setA.Merge(setB)
for path, node := range setA.Nodes {
nA, inA := cpyA.Nodes[path]
nB, inB := setB.Nodes[path]
switch {
case inA && inB:
origin := setA.Origins[path]
if !bytes.Equal(origin, cpyA.Origins[path]) {
t.Errorf("Unexpected origin, path %v: want: %v, got: %v", []byte(path), cpyA.Origins[path], origin)
}
if !reflect.DeepEqual(node, nB) {
t.Errorf("Unexpected node, path %v: want: %v, got: %v", []byte(path), spew.Sdump(nB), spew.Sdump(node))
}
case !inA && inB:
origin := setA.Origins[path]
if !bytes.Equal(origin, setB.Origins[path]) {
t.Errorf("Unexpected origin, path %v: want: %v, got: %v", []byte(path), setB.Origins[path], origin)
}
if !reflect.DeepEqual(node, nB) {
t.Errorf("Unexpected node, path %v: want: %v, got: %v", []byte(path), spew.Sdump(nB), spew.Sdump(node))
}
case inA && !inB:
origin := setA.Origins[path]
if !bytes.Equal(origin, cpyA.Origins[path]) {
t.Errorf("Unexpected origin, path %v: want: %v, got: %v", []byte(path), cpyA.Origins[path], origin)
}
if !reflect.DeepEqual(node, nA) {
t.Errorf("Unexpected node, path %v: want: %v, got: %v", []byte(path), spew.Sdump(nA), spew.Sdump(node))
}
default:
t.Errorf("Unexpected node, %v", []byte(path))
}
}
}
func BenchmarkMerge(b *testing.B) {
b.Run("1K", func(b *testing.B) {
benchmarkMerge(b, 1000)
@ -42,7 +129,7 @@ func benchmarkMerge(b *testing.B, count int) {
blob := make([]byte, 32)
rand.Read(blob)
hash := crypto.Keccak256Hash(blob)
s.AddNode(path, New(hash, blob))
s.AddNode(path, NewNodeWithPrev(hash, blob, nil))
}
for i := 0; i < count; i++ {
// Random path of 4 nibbles
@ -53,9 +140,9 @@ func benchmarkMerge(b *testing.B, count int) {
for i := 0; i < b.N; i++ {
// Store set x into a backup
z := NewNodeSet(common.Hash{})
z.Merge(common.Hash{}, x.Nodes)
z.Merge(x)
// Merge y into x
x.Merge(common.Hash{}, y.Nodes)
x.Merge(y)
x = z
}
}

View file

@ -42,6 +42,7 @@ type VerkleTrie struct {
root verkle.VerkleNode
cache *utils.PointCache
reader *trieReader
tracer *prevalueTracer
}
// NewVerkleTrie constructs a verkle tree based on the specified root hash.
@ -50,27 +51,25 @@ func NewVerkleTrie(root common.Hash, db database.NodeDatabase, cache *utils.Poin
if err != nil {
return nil, err
}
// Parse the root verkle node if it's not empty.
node := verkle.New()
if root != types.EmptyVerkleHash && root != types.EmptyRootHash {
blob, err := reader.node(nil, common.Hash{})
if err != nil {
return nil, err
}
node, err = verkle.ParseNode(blob, 0)
if err != nil {
return nil, err
}
}
return &VerkleTrie{
root: node,
t := &VerkleTrie{
root: verkle.New(),
cache: cache,
reader: reader,
}, nil
}
func (t *VerkleTrie) FlatdbNodeResolver(path []byte) ([]byte, error) {
return t.reader.node(path, common.Hash{})
tracer: newPrevalueTracer(),
}
// Parse the root verkle node if it's not empty.
if root != types.EmptyVerkleHash && root != types.EmptyRootHash {
blob, err := t.nodeResolver(nil)
if err != nil {
return nil, err
}
node, err := verkle.ParseNode(blob, 0)
if err != nil {
return nil, err
}
t.root = node
}
return t, nil
}
// GetKey returns the sha3 preimage of a hashed key that was previously used
@ -268,7 +267,7 @@ func (t *VerkleTrie) Commit(_ bool) (common.Hash, *trienode.NodeSet) {
nodeset := trienode.NewNodeSet(common.Hash{})
for _, node := range nodes {
// Hash parameter is not used in pathdb
nodeset.AddNode(node.Path, trienode.New(common.Hash{}, node.SerializedBytes))
nodeset.AddNode(node.Path, trienode.NewNodeWithPrev(common.Hash{}, node.SerializedBytes, t.tracer.get(node.Path)))
}
// Serialize root commitment form
return t.Hash(), nodeset
@ -301,6 +300,7 @@ func (t *VerkleTrie) Copy() *VerkleTrie {
root: t.root.Copy(),
cache: t.cache,
reader: t.reader,
tracer: t.tracer.copy(),
}
}
@ -317,7 +317,7 @@ func (t *VerkleTrie) Proof(posttrie *VerkleTrie, keys [][]byte) (*verkle.VerkleP
if posttrie != nil {
postroot = posttrie.root
}
proof, _, _, _, err := verkle.MakeVerkleMultiProof(t.root, postroot, keys, t.FlatdbNodeResolver)
proof, _, _, _, err := verkle.MakeVerkleMultiProof(t.root, postroot, keys, t.nodeResolver)
if err != nil {
return nil, nil, err
}
@ -421,7 +421,12 @@ func (t *VerkleTrie) ToDot() string {
}
func (t *VerkleTrie) nodeResolver(path []byte) ([]byte, error) {
return t.reader.node(path, common.Hash{})
blob, err := t.reader.node(path, common.Hash{})
if err != nil {
return nil, err
}
t.tracer.put(path, blob)
return blob, nil
}
// Witness returns a set containing all trie nodes that have been accessed.