From 028ccebd23fd11c061a75793ecdffc9bb4030b91 Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Wed, 1 Apr 2026 14:02:41 +0800 Subject: [PATCH] core/state: export StateUpdate struct --- core/state/database.go | 2 +- core/state/database_history.go | 2 +- core/state/database_iterator.go | 62 ++-- core/state/database_iterator_test.go | 21 +- core/state/database_mpt.go | 33 ++- core/state/database_ubt.go | 23 +- core/state/dump.go | 16 +- core/state/state_object.go | 62 ++-- core/state/state_sizer.go | 59 ++-- core/state/state_sizer_test.go | 4 +- core/state/statedb.go | 55 ++-- core/state/statedb_fuzz_test.go | 15 +- core/state/stateupdate.go | 424 +++++++++++++++------------ 13 files changed, 428 insertions(+), 350 deletions(-) diff --git a/core/state/database.go b/core/state/database.go index b9ccff658f..3b1e627f28 100644 --- a/core/state/database.go +++ b/core/state/database.go @@ -70,7 +70,7 @@ type Database interface { // Commit flushes all pending writes and finalizes the state transition, // committing the changes to the underlying storage. It returns an error // if the commit fails. - Commit(update *stateUpdate) error + Commit(update *StateUpdate) error } // Trie is a Ethereum Merkle Patricia trie. diff --git a/core/state/database_history.go b/core/state/database_history.go index d1ed2fe194..fbf4ab5f9c 100644 --- a/core/state/database_history.go +++ b/core/state/database_history.go @@ -297,7 +297,7 @@ func (db *HistoricDB) TrieDB() *triedb.Database { // Commit flushes all pending writes and finalizes the state transition, // committing the changes to the underlying storage. It returns an error // if the commit fails. -func (db *HistoricDB) Commit(update *stateUpdate) error { +func (db *HistoricDB) Commit(update *StateUpdate) error { return errors.New("not implemented") } diff --git a/core/state/database_iterator.go b/core/state/database_iterator.go index 8fad66a1e8..d4aaeb89ad 100644 --- a/core/state/database_iterator.go +++ b/core/state/database_iterator.go @@ -23,6 +23,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/state/snapshot" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/triedb" ) @@ -57,9 +58,9 @@ type AccountIterator interface { // An error will be returned if the preimage is not available. Address() (common.Address, error) - // Account returns the RLP encoded account the iterator is currently at. + // Account returns the account the iterator is currently at. // An error will be retained if the iterator becomes invalid. - Account() []byte + Account() *types.StateAccount } // StorageIterator is an iterator to step over the specific storage in the @@ -73,7 +74,7 @@ type StorageIterator interface { // Slot returns the storage slot the iterator is currently at. An error will // be retained if the iterator becomes invalid. - Slot() []byte + Slot() common.Hash } // Iteratee wraps the NewIterator methods for traversing the accounts and @@ -131,10 +132,7 @@ func (ai *flatAccountIterator) Next() bool { // Error returns any failure that occurred during iteration, which might have // caused a premature iteration exit. func (ai *flatAccountIterator) Error() error { - if ai.err != nil { - return ai.err - } - return ai.it.Error() + return errors.Join(ai.err, ai.it.Error()) } // Hash returns the hash of the account or storage slot the iterator is @@ -165,8 +163,8 @@ func (ai *flatAccountIterator) Address() (common.Address, error) { // Account returns the account data the iterator is currently at. The account // data is encoded as slim format from the underlying iterator, the conversion // is required. -func (ai *flatAccountIterator) Account() []byte { - data, err := types.FullAccountRLP(ai.it.Account()) +func (ai *flatAccountIterator) Account() *types.StateAccount { + data, err := types.FullAccount(ai.it.Account()) if err != nil { ai.err = err return nil @@ -176,6 +174,7 @@ func (ai *flatAccountIterator) Account() []byte { // flatStorageIterator is a wrapper around the underlying flat state iterator. type flatStorageIterator struct { + err error it snapshot.StorageIterator preimage PreimageReader } @@ -196,7 +195,7 @@ func (si *flatStorageIterator) Next() bool { // Error returns any failure that occurred during iteration, which might have // caused a premature iteration exit. func (si *flatStorageIterator) Error() error { - return si.it.Error() + return errors.Join(si.err, si.it.Error()) } // Hash returns the hash of the account or storage slot the iterator is @@ -225,14 +224,24 @@ func (si *flatStorageIterator) Key() (common.Hash, error) { } // Slot returns the storage slot data the iterator is currently at. -func (si *flatStorageIterator) Slot() []byte { - return si.it.Slot() +func (si *flatStorageIterator) Slot() common.Hash { + // Perform the rlp-decode as the slot value is RLP-encoded + blob := si.it.Slot() + _, content, _, err := rlp.Split(blob) + if err != nil { + si.err = err + return common.Hash{} + } + var value common.Hash + value.SetBytes(content) + return value } // merkleIterator implements the Iterator interface, providing functions to traverse // the accounts or storages with the manner of Merkle-Patricia-Trie. type merkleIterator struct { tr Trie + err error it *trie.Iterator account bool } @@ -260,7 +269,7 @@ func (ti *merkleIterator) Next() bool { // Error returns any failure that occurred during iteration, which might have // caused a premature iteration exit. func (ti *merkleIterator) Error() error { - return ti.it.Err + return errors.Join(ti.err, ti.it.Err) } // Hash returns the hash of the account or storage slot the iterator is @@ -287,11 +296,16 @@ func (ti *merkleIterator) Address() (common.Address, error) { } // Account returns the account data the iterator is currently at. -func (ti *merkleIterator) Account() []byte { +func (ti *merkleIterator) Account() *types.StateAccount { if !ti.account { return nil } - return ti.it.Value + var account types.StateAccount + if err := rlp.DecodeBytes(ti.it.Value, &account); err != nil { + ti.err = err + return nil + } + return &account } // Key returns the raw storage slot key the iterator is currently at. @@ -308,11 +322,19 @@ func (ti *merkleIterator) Key() (common.Hash, error) { } // Slot returns the storage slot the iterator is currently at. -func (ti *merkleIterator) Slot() []byte { +func (ti *merkleIterator) Slot() common.Hash { if ti.account { - return nil + return common.Hash{} } - return ti.it.Value + // Perform the rlp-decode as the slot value is RLP-encoded + _, content, _, err := rlp.Split(ti.it.Value) + if err != nil { + ti.err = err + return common.Hash{} + } + var value common.Hash + value.SetBytes(content) + return value } // stateIteratee implements Iteratee interface, providing the state traversal @@ -430,6 +452,6 @@ func (e exhaustedIterator) Key() (common.Hash, error) { return common.Hash{}, nil } -func (e exhaustedIterator) Slot() []byte { - return nil +func (e exhaustedIterator) Slot() common.Hash { + return common.Hash{} } diff --git a/core/state/database_iterator_test.go b/core/state/database_iterator_test.go index 87819e5526..8313f86403 100644 --- a/core/state/database_iterator_test.go +++ b/core/state/database_iterator_test.go @@ -24,7 +24,6 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" - "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" ) @@ -45,7 +44,7 @@ func TestExhaustedIterator(t *testing.T) { if key, err := it.Key(); key != (common.Hash{}) || err != nil { t.Fatalf("Key() = %x, %v; want zero, nil", key, err) } - if slot := it.Slot(); slot != nil { + if slot := it.Slot(); slot != (common.Hash{}) { t.Fatalf("Slot() = %x, want nil", slot) } it.Release() @@ -95,20 +94,16 @@ func testAccountIterator(t *testing.T, scheme string) { hashes = append(hashes, hash) // Decode and verify account data. - blob := acctIt.Account() - if blob == nil { + got := acctIt.Account() + if got == nil { t.Fatalf("(%s) nil account at %x", scheme, hash) } - var decoded types.StateAccount - if err := rlp.DecodeBytes(blob, &decoded); err != nil { - t.Fatalf("(%s) bad RLP at %x: %v", scheme, hash, err) - } acc := addrByHash[hash] - if decoded.Nonce != acc.nonce { - t.Fatalf("(%s) nonce %x: got %d, want %d", scheme, hash, decoded.Nonce, acc.nonce) + if got.Nonce != acc.nonce { + t.Fatalf("(%s) nonce %x: got %d, want %d", scheme, hash, got.Nonce, acc.nonce) } - if decoded.Balance.Cmp(acc.balance) != 0 { - t.Fatalf("(%s) balance %x: got %v, want %v", scheme, hash, decoded.Balance, acc.balance) + if got.Balance.Cmp(acc.balance) != 0 { + t.Fatalf("(%s) balance %x: got %v, want %v", scheme, hash, got.Balance, acc.balance) } // Verify address preimage resolution. addr, err := acctIt.Address() @@ -183,7 +178,7 @@ func testStorageIterator(t *testing.T, scheme string) { t.Fatalf("(%s) storage hashes not ascending for %x", scheme, acc.address) } prevHash = hash - if storageIt.Slot() == nil { + if storageIt.Slot() == (common.Hash{}) { t.Fatalf("(%s) nil slot at %x", scheme, hash) } // Check key preimage resolution on first slot. diff --git a/core/state/database_mpt.go b/core/state/database_mpt.go index bf9c7c9fff..2080cd5450 100644 --- a/core/state/database_mpt.go +++ b/core/state/database_mpt.go @@ -140,35 +140,44 @@ func (db *MPTDatabase) TrieDB() *triedb.Database { // Commit flushes all pending writes and finalizes the state transition, // committing the changes to the underlying storage. It returns an error // if the commit fails. -func (db *MPTDatabase) Commit(update *stateUpdate) error { +func (db *MPTDatabase) Commit(update *StateUpdate) error { // Short circuit if nothing to commit - if update.empty() { + if update.Empty() { return nil } // Commit dirty contract code if any exists - if len(update.codes) > 0 { - batch := db.codedb.NewBatchWithSize(len(update.codes)) - for _, code := range update.codes { - batch.Put(code.hash, code.blob) + if len(update.Codes) > 0 { + batch := db.codedb.NewBatchWithSize(len(update.Codes)) + for _, code := range update.Codes { + batch.Put(code.Hash, code.Blob) } if err := batch.Commit(); err != nil { return err } } + // Encode the state mutations in the MPT format + accounts, accountOrigin, storages, storageOrigin := update.EncodeMPTState() + // If snapshotting is enabled, update the snapshot tree with this new version - if db.snap != nil && db.snap.Snapshot(update.originRoot) != nil { - if err := db.snap.Update(update.root, update.originRoot, update.accounts, update.storages); err != nil { - log.Warn("Failed to update snapshot tree", "from", update.originRoot, "to", update.root, "err", err) + if db.snap != nil && db.snap.Snapshot(update.OriginRoot) != nil { + if err := db.snap.Update(update.Root, update.OriginRoot, accounts, storages); err != nil { + log.Warn("Failed to update snapshot tree", "from", update.OriginRoot, "to", update.Root, "err", err) } // Keep 128 diff layers in the memory, persistent layer is 129th. // - head layer is paired with HEAD state // - head-1 layer is paired with HEAD-1 state // - head-127 layer(bottom-most diff layer) is paired with HEAD-127 state - if err := db.snap.Cap(update.root, TriesInMemory); err != nil { - log.Warn("Failed to cap snapshot tree", "root", update.root, "layers", TriesInMemory, "err", err) + if err := db.snap.Cap(update.Root, TriesInMemory); err != nil { + log.Warn("Failed to cap snapshot tree", "root", update.Root, "layers", TriesInMemory, "err", err) } } - return db.triedb.Update(update.root, update.originRoot, update.blockNumber, update.nodes, update.stateSet()) + return db.triedb.Update(update.Root, update.OriginRoot, update.BlockNumber, update.Nodes, &triedb.StateSet{ + Accounts: accounts, + AccountsOrigin: accountOrigin, + Storages: storages, + StoragesOrigin: storageOrigin, + RawStorageKey: update.StorageKeyType == StorageKeyPlain, + }) } // Iteratee returns a state iteratee associated with the specified state root, diff --git a/core/state/database_ubt.go b/core/state/database_ubt.go index 39aa64508b..1f10297562 100644 --- a/core/state/database_ubt.go +++ b/core/state/database_ubt.go @@ -113,22 +113,31 @@ func (db *UBTDatabase) TrieDB() *triedb.Database { // Commit flushes all pending writes and finalizes the state transition, // committing the changes to the underlying storage. It returns an error // if the commit fails. -func (db *UBTDatabase) Commit(update *stateUpdate) error { +func (db *UBTDatabase) Commit(update *StateUpdate) error { // Short circuit if nothing to commit - if update.empty() { + if update.Empty() { return nil } // Commit dirty contract code if any exists - if len(update.codes) > 0 { - batch := db.codedb.NewBatchWithSize(len(update.codes)) - for _, code := range update.codes { - batch.Put(code.hash, code.blob) + if len(update.Codes) > 0 { + batch := db.codedb.NewBatchWithSize(len(update.Codes)) + for _, code := range update.Codes { + batch.Put(code.Hash, code.Blob) } if err := batch.Commit(); err != nil { return err } } - return db.triedb.Update(update.root, update.originRoot, update.blockNumber, update.nodes, update.stateSet()) + // Encode the state mutations in the UBT format + accounts, accountOrigin, storages, storageOrigin := update.EncodeUBTState() + + return db.triedb.Update(update.Root, update.OriginRoot, update.BlockNumber, update.Nodes, &triedb.StateSet{ + Accounts: accounts, + AccountsOrigin: accountOrigin, + Storages: storages, + StoragesOrigin: storageOrigin, + RawStorageKey: update.StorageKeyType == StorageKeyPlain, + }) } // Iteratee returns a state iteratee associated with the specified state root, diff --git a/core/state/dump.go b/core/state/dump.go index 71138143d9..467877b225 100644 --- a/core/state/dump.go +++ b/core/state/dump.go @@ -24,9 +24,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/hexutil" - "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie/bintrie" ) @@ -144,11 +142,8 @@ func (s *StateDB) DumpToCollector(c DumpCollector, conf *DumpConfig) (nextKey [] defer acctIt.Release() for acctIt.Next() { - var data types.StateAccount - if err := rlp.DecodeBytes(acctIt.Account(), &data); err != nil { - panic(err) - } var ( + data = acctIt.Account() account = DumpAccount{ Balance: data.Balance.String(), Nonce: data.Nonce, @@ -168,7 +163,7 @@ func (s *StateDB) DumpToCollector(c DumpCollector, conf *DumpConfig) (nextKey [] address = &addrBytes account.Address = address } - obj := newObject(s, addrBytes, &data) + obj := newObject(s, addrBytes, data) if !conf.SkipCode { account.Code = obj.Code() } @@ -181,16 +176,11 @@ func (s *StateDB) DumpToCollector(c DumpCollector, conf *DumpConfig) (nextKey [] continue } for storageIt.Next() { - _, content, _, err := rlp.Split(storageIt.Slot()) - if err != nil { - log.Error("Failed to decode the value returned by iterator", "error", err) - continue - } key, err := storageIt.Key() if err != nil { continue } - account.Storage[key] = common.Bytes2Hex(content) + account.Storage[key] = storageIt.Slot().String() } storageIt.Release() } diff --git a/core/state/state_object.go b/core/state/state_object.go index a812359368..df54733d63 100644 --- a/core/state/state_object.go +++ b/core/state/state_object.go @@ -27,7 +27,6 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/bintrie" "github.com/ethereum/go-ethereum/trie/transitiontrie" @@ -398,17 +397,8 @@ func (s *stateObject) updateRoot() { } // commitStorage overwrites the clean storage with the storage changes and -// fulfills the storage diffs into the given accountUpdate struct. -func (s *stateObject) commitStorage(op *accountUpdate) { - var ( - encode = func(val common.Hash) []byte { - if val == (common.Hash{}) { - return nil - } - blob, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(val[:])) - return blob - } - ) +// fulfills the storage diffs into the given AccountUpdate struct. +func (s *stateObject) commitStorage(op *AccountUpdate) { for key, val := range s.pendingStorage { // Skip the noop storage changes, it might be possible the value // of tracked slot is same in originStorage and pendingStorage @@ -418,20 +408,20 @@ func (s *stateObject) commitStorage(op *accountUpdate) { continue } hash := crypto.Keccak256Hash(key[:]) - if op.storages == nil { - op.storages = make(map[common.Hash][]byte) + if op.Storages == nil { + op.Storages = make(map[common.Hash]common.Hash) } - op.storages[hash] = encode(val) + op.Storages[hash] = val - if op.storagesOriginByKey == nil { - op.storagesOriginByKey = make(map[common.Hash][]byte) + if op.StoragesOriginByKey == nil { + op.StoragesOriginByKey = make(map[common.Hash]common.Hash) } - if op.storagesOriginByHash == nil { - op.storagesOriginByHash = make(map[common.Hash][]byte) + if op.StoragesOriginByHash == nil { + op.StoragesOriginByHash = make(map[common.Hash]common.Hash) } - origin := encode(s.originStorage[key]) - op.storagesOriginByKey[key] = origin - op.storagesOriginByHash[hash] = origin + origin := s.originStorage[key] + op.StoragesOriginByKey[key] = origin + op.StoragesOriginByHash[hash] = origin // Overwrite the clean value of storage slots s.originStorage[key] = val @@ -444,32 +434,32 @@ func (s *stateObject) commitStorage(op *accountUpdate) { // // Note, commit may run concurrently across all the state objects. Do not assume // thread-safe access to the statedb. -func (s *stateObject) commit() (*accountUpdate, *trienode.NodeSet, error) { - // commit the account metadata changes - op := &accountUpdate{ - address: s.address, - data: types.SlimAccountRLP(s.data), - } - if s.origin != nil { - op.origin = types.SlimAccountRLP(*s.origin) +func (s *stateObject) commit() (*AccountUpdate, *trienode.NodeSet, error) { + // commit the account metadata changes, the data must be deep-copied + // to prevent accidental mutations later on (in practice the stateDB + // won't be modified after commit). The origin is safe to use directly. + op := &AccountUpdate{ + Address: s.address, + Data: s.data.Copy(), + Origin: s.origin, } // commit the contract code if it's modified if s.dirtyCode { - op.code = &contractCode{ - hash: common.BytesToHash(s.CodeHash()), - blob: s.code, + op.Code = &ContractCode{ + Hash: common.BytesToHash(s.CodeHash()), + Blob: s.code, } s.dirtyCode = false // reset the dirty flag if s.origin == nil { - op.code.originHash = types.EmptyCodeHash + op.Code.OriginHash = types.EmptyCodeHash } else { - op.code.originHash = common.BytesToHash(s.origin.CodeHash) + op.Code.OriginHash = common.BytesToHash(s.origin.CodeHash) } } // Commit storage changes and the associated storage trie s.commitStorage(op) - if len(op.storages) == 0 { + if len(op.Storages) == 0 { // nothing changed, don't bother to commit the trie s.origin = s.data.Copy() return op, nil, nil diff --git a/core/state/state_sizer.go b/core/state/state_sizer.go index 02b73e5575..3293d7e950 100644 --- a/core/state/state_sizer.go +++ b/core/state/state_sizer.go @@ -125,16 +125,17 @@ func (s SizeStats) add(diff SizeStats) SizeStats { } // calSizeStats measures the state size changes of the provided state update. -func calSizeStats(update *stateUpdate) (SizeStats, error) { +func calSizeStats(update *StateUpdate) (SizeStats, error) { stats := SizeStats{ - BlockNumber: update.blockNumber, - StateRoot: update.root, + BlockNumber: update.BlockNumber, + StateRoot: update.Root, } + accounts, accountOrigin, storages, storageOrigin := update.EncodeMPTState() // Measure the account changes - for addr, oldValue := range update.accountsOrigin { + for addr, oldValue := range accountOrigin { addrHash := crypto.Keccak256Hash(addr.Bytes()) - newValue, exists := update.accounts[addrHash] + newValue, exists := accounts[addrHash] if !exists { return SizeStats{}, fmt.Errorf("account %x not found", addr) } @@ -156,9 +157,9 @@ func calSizeStats(update *stateUpdate) (SizeStats, error) { } // Measure storage changes - for addr, slots := range update.storagesOrigin { + for addr, slots := range storageOrigin { addrHash := crypto.Keccak256Hash(addr.Bytes()) - subset, exists := update.storages[addrHash] + subset, exists := storages[addrHash] if !exists { return SizeStats{}, fmt.Errorf("storage %x not found", addr) } @@ -167,7 +168,7 @@ func calSizeStats(update *stateUpdate) (SizeStats, error) { exists bool newValue []byte ) - if update.rawStorageKey { + if update.StorageKeyType == StorageKeyPlain { newValue, exists = subset[crypto.Keccak256Hash(key.Bytes())] } else { newValue, exists = subset[key] @@ -194,7 +195,7 @@ func calSizeStats(update *stateUpdate) (SizeStats, error) { } // Measure trienode changes - for owner, subset := range update.nodes.Sets { + for owner, subset := range update.Nodes.Sets { var ( keyPrefix int64 isAccount = owner == (common.Hash{}) @@ -244,13 +245,13 @@ func calSizeStats(update *stateUpdate) (SizeStats, error) { } codeExists := make(map[common.Hash]struct{}) - for _, code := range update.codes { - if _, ok := codeExists[code.hash]; ok || code.duplicate { + for _, code := range update.Codes { + if _, ok := codeExists[code.Hash]; ok || code.Duplicate { continue } stats.ContractCodes += 1 - stats.ContractCodeBytes += codeKeySize + int64(len(code.blob)) - codeExists[code.hash] = struct{}{} + stats.ContractCodeBytes += codeKeySize + int64(len(code.Blob)) + codeExists[code.Hash] = struct{}{} } return stats, nil } @@ -267,7 +268,7 @@ type SizeTracker struct { triedb *triedb.Database abort chan struct{} aborted chan struct{} - updateCh chan *stateUpdate + updateCh chan *StateUpdate queryCh chan *stateSizeQuery } @@ -281,7 +282,7 @@ func NewSizeTracker(db ethdb.KeyValueStore, triedb *triedb.Database) (*SizeTrack triedb: triedb, abort: make(chan struct{}), aborted: make(chan struct{}), - updateCh: make(chan *stateUpdate), + updateCh: make(chan *StateUpdate), queryCh: make(chan *stateSizeQuery), } go t.run() @@ -328,9 +329,9 @@ func (t *SizeTracker) run() { for { select { case u := <-t.updateCh: - base, found := stats[u.originRoot] + base, found := stats[u.OriginRoot] if !found { - log.Debug("Ignored the state size without parent", "parent", u.originRoot, "root", u.root, "number", u.blockNumber) + log.Debug("Ignored the state size without parent", "parent", u.OriginRoot, "root", u.Root, "number", u.BlockNumber) continue } diff, err := calSizeStats(u) @@ -338,15 +339,15 @@ func (t *SizeTracker) run() { continue } stat := base.add(diff) - stats[u.root] = stat - last = u.root + stats[u.Root] = stat + last = u.Root // Publish statistics to metric system stat.publish() // Evict the stale statistics - heap.Push(&h, stats[u.root]) - for len(h) > 0 && u.blockNumber-h[0].BlockNumber > statEvictThreshold { + heap.Push(&h, stats[u.Root]) + for len(h) > 0 && u.BlockNumber-h[0].BlockNumber > statEvictThreshold { delete(stats, h[0].StateRoot) heap.Pop(&h) } @@ -402,7 +403,7 @@ wait: } var ( - updates = make(map[common.Hash]*stateUpdate) + updates = make(map[common.Hash]*StateUpdate) children = make(map[common.Hash][]common.Hash) done chan buildResult ) @@ -410,9 +411,9 @@ wait: for { select { case u := <-t.updateCh: - updates[u.root] = u - children[u.originRoot] = append(children[u.originRoot], u.root) - log.Debug("Received state update", "root", u.root, "blockNumber", u.blockNumber) + updates[u.Root] = u + children[u.OriginRoot] = append(children[u.OriginRoot], u.Root) + log.Debug("Received state update", "root", u.Root, "blockNumber", u.BlockNumber) case r := <-t.queryCh: r.err = errors.New("state size is not initialized yet") @@ -432,8 +433,8 @@ wait: continue } done = make(chan buildResult) - go t.build(entry.root, entry.blockNumber, done) - log.Info("Measuring persistent state size", "root", root.Hex(), "number", entry.blockNumber) + go t.build(entry.Root, entry.BlockNumber, done) + log.Info("Measuring persistent state size", "root", root.Hex(), "number", entry.BlockNumber) case result := <-done: if result.err != nil { @@ -646,8 +647,8 @@ func (t *SizeTracker) iterateTableParallel(closed chan struct{}, prefix []byte, // Notify is an async method used to send the state update to the size tracker. // It ignores empty updates (where no state changes occurred). // If the channel is full, it drops the update to avoid blocking. -func (t *SizeTracker) Notify(update *stateUpdate) { - if update == nil || update.empty() { +func (t *SizeTracker) Notify(update *StateUpdate) { + if update == nil || update.Empty() { return } select { diff --git a/core/state/state_sizer_test.go b/core/state/state_sizer_test.go index b3203afd74..539f160985 100644 --- a/core/state/state_sizer_test.go +++ b/core/state/state_sizer_test.go @@ -160,7 +160,7 @@ func TestSizeTracker(t *testing.T) { } tracker.Notify(ret) - if err := tdb.Commit(ret.root, false); err != nil { + if err := tdb.Commit(ret.Root, false); err != nil { t.Fatalf("Failed to commit trie at block %d: %v", blockNum, err) } @@ -169,7 +169,7 @@ func TestSizeTracker(t *testing.T) { t.Fatalf("Failed to calculate size stats for block %d: %v", blockNum, err) } trackedUpdates = append(trackedUpdates, diff) - currentRoot = ret.root + currentRoot = ret.Root } finalRoot := rawdb.ReadSnapshotRoot(db) diff --git a/core/state/statedb.go b/core/state/statedb.go index 956871472d..8c57edf08e 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -1045,11 +1045,11 @@ func (s *StateDB) clearJournalAndRefund() { } // deleteStorage is designed to delete the storage trie of a designated account. -func (s *StateDB) deleteStorage(addrHash common.Hash, root common.Hash) (map[common.Hash][]byte, map[common.Hash][]byte, *trienode.NodeSet, error) { +func (s *StateDB) deleteStorage(addrHash common.Hash, root common.Hash) (map[common.Hash]common.Hash, map[common.Hash]common.Hash, *trienode.NodeSet, error) { var ( - nodes = trienode.NewNodeSet(addrHash) // the set for trie node mutations (value is nil) - storages = make(map[common.Hash][]byte) // the set for storage mutations (value is nil) - storageOrigins = make(map[common.Hash][]byte) // the set for tracking the original value of slot + nodes = trienode.NewNodeSet(addrHash) // the set for trie node mutations (value is nil) + storages = make(map[common.Hash]common.Hash) // the set for storage mutations (value is nil) + storageOrigins = make(map[common.Hash]common.Hash) // the set for tracking the original value of slot ) iteratee, err := s.db.Iteratee(s.originalRoot) if err != nil { @@ -1065,19 +1065,24 @@ func (s *StateDB) deleteStorage(addrHash common.Hash, root common.Hash) (map[com nodes.AddNode(path, trienode.NewDeletedWithPrev(blob)) }) for it.Next() { - slot := common.CopyBytes(it.Slot()) - if err := it.Error(); err != nil { // error might occur after Slot function + slot := it.Slot() + // Error might occur after Slot function + if err := it.Error(); err != nil { return nil, nil, nil, err } + if slot == (common.Hash{}) { + return nil, nil, nil, fmt.Errorf("unexpected empty storage slot, addr: %x, slot: %x", addrHash, it.Hash()) + } key := it.Hash() - storages[key] = nil + storages[key] = common.Hash{} storageOrigins[key] = slot - if err := stack.Update(key.Bytes(), slot); err != nil { + if err := stack.Update(key.Bytes(), encodeSlot(slot)); err != nil { return nil, nil, nil, err } } - if err := it.Error(); err != nil { // error might occur during iteration + // Error might occur during iteration + if err := it.Error(); err != nil { return nil, nil, nil, err } if stack.Hash() != root { @@ -1104,10 +1109,10 @@ func (s *StateDB) deleteStorage(addrHash common.Hash, root common.Hash) (map[com // with their values be tracked as original value. // In case (d), **original** account along with its storages should be deleted, // with their values be tracked as original value. -func (s *StateDB) handleDestruction(noStorageWiping bool) (map[common.Hash]*accountDelete, []*trienode.NodeSet, error) { +func (s *StateDB) handleDestruction(noStorageWiping bool) (map[common.Hash]*AccountDelete, []*trienode.NodeSet, error) { var ( nodes []*trienode.NodeSet - deletes = make(map[common.Hash]*accountDelete) + deletes = make(map[common.Hash]*AccountDelete) ) for addr, prevObj := range s.stateObjectsDestruct { prev := prevObj.origin @@ -1122,9 +1127,9 @@ func (s *StateDB) handleDestruction(noStorageWiping bool) (map[common.Hash]*acco } // The account was existent, it can be either case (c) or (d). addrHash := crypto.Keccak256Hash(addr.Bytes()) - op := &accountDelete{ - address: addr, - origin: types.SlimAccountRLP(*prev), + op := &AccountDelete{ + Address: addr, + Origin: prev, } deletes[addrHash] = op @@ -1140,8 +1145,8 @@ func (s *StateDB) handleDestruction(noStorageWiping bool) (map[common.Hash]*acco if err != nil { return nil, nil, fmt.Errorf("failed to delete storage, err: %w", err) } - op.storages = storages - op.storagesOrigin = storagesOrigin + op.Storages = storages + op.StoragesOrigin = storagesOrigin // Aggregate the associated trie node changes. nodes = append(nodes, set) @@ -1156,7 +1161,7 @@ func (s *StateDB) GetTrie() Trie { // commit gathers the state mutations accumulated along with the associated // trie changes, resetting all internal flags with the new state as the base. -func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool, blockNumber uint64) (*stateUpdate, error) { +func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool, blockNumber uint64) (*StateUpdate, error) { // Short circuit in case any database failure occurred earlier. if s.dbErr != nil { return nil, fmt.Errorf("commit aborted due to earlier error: %v", s.dbErr) @@ -1177,7 +1182,7 @@ func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool, blockNum lock sync.Mutex // protect two maps below nodes = trienode.NewMergedNodeSet() // aggregated trie nodes - updates = make(map[common.Hash]*accountUpdate, len(s.mutations)) // aggregated account updates + updates = make(map[common.Hash]*AccountUpdate, len(s.mutations)) // aggregated account updates // merge aggregates the dirty trie nodes into the global set. // @@ -1305,12 +1310,16 @@ func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool, blockNum origin := s.originalRoot s.originalRoot = root - return newStateUpdate(noStorageWiping, origin, root, blockNumber, deletes, updates, nodes), nil + typ := StorageKeyHashed + if noStorageWiping { + typ = StorageKeyPlain + } + return NewStateUpdate(typ, origin, root, blockNumber, deletes, updates, nodes), nil } // commitAndFlush is a wrapper of commit which also commits the state mutations // to the configured data stores. -func (s *StateDB) commitAndFlush(block uint64, deleteEmptyObjects bool, noStorageWiping bool, deriveCodeFields bool) (*stateUpdate, error) { +func (s *StateDB) commitAndFlush(block uint64, deleteEmptyObjects bool, noStorageWiping bool, deriveCodeFields bool) (*StateUpdate, error) { ret, err := s.commit(deleteEmptyObjects, noStorageWiping, block) if err != nil { return nil, err @@ -1351,17 +1360,17 @@ func (s *StateDB) Commit(block uint64, deleteEmptyObjects bool, noStorageWiping if err != nil { return common.Hash{}, err } - return ret.root, nil + return ret.Root, nil } // CommitWithUpdate writes the state mutations and returns the state update for // external processing (e.g., live tracing hooks or size tracker). -func (s *StateDB) CommitWithUpdate(block uint64, deleteEmptyObjects bool, noStorageWiping bool) (common.Hash, *stateUpdate, error) { +func (s *StateDB) CommitWithUpdate(block uint64, deleteEmptyObjects bool, noStorageWiping bool) (common.Hash, *StateUpdate, error) { ret, err := s.commitAndFlush(block, deleteEmptyObjects, noStorageWiping, true) if err != nil { return common.Hash{}, nil, err } - return ret.root, ret, nil + return ret.Root, ret, nil } // Prepare handles the preparatory steps for executing a state transition with. diff --git a/core/state/statedb_fuzz_test.go b/core/state/statedb_fuzz_test.go index a8017e5568..c796b416a3 100644 --- a/core/state/statedb_fuzz_test.go +++ b/core/state/statedb_fuzz_test.go @@ -182,11 +182,12 @@ func (test *stateTest) run() bool { accountOrigin []map[common.Address][]byte storages []map[common.Hash]map[common.Hash][]byte storageOrigin []map[common.Address]map[common.Hash][]byte - copyUpdate = func(update *stateUpdate) { - accounts = append(accounts, maps.Clone(update.accounts)) - accountOrigin = append(accountOrigin, maps.Clone(update.accountsOrigin)) - storages = append(storages, maps.Clone(update.storages)) - storageOrigin = append(storageOrigin, maps.Clone(update.storagesOrigin)) + copyUpdate = func(update *StateUpdate) { + accts, acctOrigin, slots, slotOrigin := update.EncodeMPTState() + accounts = append(accounts, maps.Clone(accts)) + accountOrigin = append(accountOrigin, maps.Clone(acctOrigin)) + storages = append(storages, maps.Clone(slots)) + storageOrigin = append(storageOrigin, maps.Clone(slotOrigin)) } disk = rawdb.NewMemoryDatabase() tdb = triedb.NewDatabase(disk, &triedb.Config{PathDB: pathdb.Defaults}) @@ -232,11 +233,11 @@ func (test *stateTest) run() bool { if err != nil { panic(err) } - if ret.empty() { + if ret.Empty() { return true } copyUpdate(ret) - roots = append(roots, ret.root) + roots = append(roots, ret.Root) } for i := 0; i < len(test.actions); i++ { root := types.EmptyRootHash diff --git a/core/state/stateupdate.go b/core/state/stateupdate.go index 1c171cbd5e..6f488595eb 100644 --- a/core/state/stateupdate.go +++ b/core/state/stateupdate.go @@ -26,139 +26,148 @@ import ( "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/triedb" ) -// contractCode represents contract bytecode along with its associated metadata. -type contractCode struct { - hash common.Hash // hash is the cryptographic hash of the current contract code. - blob []byte // blob is the binary representation of the current contract code. - originHash common.Hash // originHash is the cryptographic hash of the code before mutation. +// ContractCode represents contract bytecode mutation along with its +// associated metadata. +type ContractCode struct { + Hash common.Hash // Hash is the cryptographic hash of the current contract code. + Blob []byte // Blob is the binary representation of the current contract code. + OriginHash common.Hash // OriginHash is the cryptographic hash of the code before mutation. // Derived fields, populated only when state tracking is enabled. - duplicate bool // duplicate indicates whether the updated code already exists. - originBlob []byte // originBlob is the original binary representation of the contract code. + Duplicate bool // Duplicate indicates whether the updated code already exists. + OriginBlob []byte // OriginBlob is the original binary representation of the contract code. } -// accountDelete represents an operation for deleting an Ethereum account. -type accountDelete struct { - address common.Address // address is the unique account identifier - origin []byte // origin is the original value of account data in slim-RLP encoding. - - // storages stores mutated slots, the value should be nil. - storages map[common.Hash][]byte - - // storagesOrigin stores the original values of mutated slots in - // prefix-zero-trimmed RLP format. The map key refers to the **HASH** - // of the raw storage slot key. - storagesOrigin map[common.Hash][]byte +// AccountDelete represents a deletion operation for an Ethereum account. +type AccountDelete struct { + Address common.Address // Address uniquely identifies the account. + Origin *types.StateAccount // Origin is the account state prior to deletion (never be null). + Storages map[common.Hash]common.Hash // Storages contains mutated storage slots. + StoragesOrigin map[common.Hash]common.Hash // StoragesOrigin holds original values of mutated slots; keys are hashes of raw storage slot keys. } -// accountUpdate represents an operation for updating an Ethereum account. -type accountUpdate struct { - address common.Address // address is the unique account identifier - data []byte // data is the slim-RLP encoded account data. - origin []byte // origin is the original value of account data in slim-RLP encoding. - code *contractCode // code represents mutated contract code; nil means it's not modified. - storages map[common.Hash][]byte // storages stores mutated slots in prefix-zero-trimmed RLP format. +// AccountUpdate represents an update operation for an Ethereum account. +type AccountUpdate struct { + Address common.Address // Address uniquely identifies the account. + Data *types.StateAccount // Data is the updated account state; nil indicates deletion. + Origin *types.StateAccount // Origin is the previous account state; nil indicates non-existence. + Code *ContractCode // Code contains updated contract code; nil if unchanged. + Storages map[common.Hash]common.Hash // Storages contains updated storage slots. - // storagesOriginByKey and storagesOriginByHash both store the original values - // of mutated slots in prefix-zero-trimmed RLP format. The difference is that - // storagesOriginByKey uses the **raw** storage slot key as the map ID, while - // storagesOriginByHash uses the **hash** of the storage slot key instead. - storagesOriginByKey map[common.Hash][]byte - storagesOriginByHash map[common.Hash][]byte + // StoragesOriginByKey and StoragesOriginByHash both record original values + // of mutated storage slots: + // - StoragesOriginByKey uses raw storage slot keys. + // - StoragesOriginByHash uses hashed storage slot keys. + StoragesOriginByKey map[common.Hash]common.Hash + StoragesOriginByHash map[common.Hash]common.Hash } -// stateUpdate represents the difference between two states resulting from state +// StorageKeyEncoding specifies the encoding scheme of a storage key. +type StorageKeyEncoding int + +const ( + // StorageKeyHashed represents a hashed key (e.g. Keccak256). + StorageKeyHashed StorageKeyEncoding = iota + + // StorageKeyPlain represents a raw (unhashed) key. + StorageKeyPlain +) + +// StateUpdate represents the difference between two states resulting from state // execution. It contains information about mutated contract codes, accounts, // and storage slots, along with their original values. -type stateUpdate struct { - originRoot common.Hash // hash of the state before applying mutation - root common.Hash // hash of the state after applying mutation - blockNumber uint64 // Associated block number +type StateUpdate struct { + OriginRoot common.Hash // Hash of the state before applying mutation + Root common.Hash // Hash of the state after applying mutation + BlockNumber uint64 // Associated block number - accounts map[common.Hash][]byte // accounts stores mutated accounts in 'slim RLP' encoding - accountsOrigin map[common.Address][]byte // accountsOrigin stores the original values of mutated accounts in 'slim RLP' encoding + // Accounts contains mutated accounts, keyed by address hash. + Accounts map[common.Hash]*types.StateAccount - // storages stores mutated slots in 'prefix-zero-trimmed' RLP format. - // The value is keyed by account hash and **storage slot key hash**. - storages map[common.Hash]map[common.Hash][]byte + // Storages contains mutated storage slots, keyed by address + // hash and storage slot key hash. + Storages map[common.Hash]map[common.Hash]common.Hash - // storagesOrigin stores the original values of mutated slots in - // 'prefix-zero-trimmed' RLP format. - // (a) the value is keyed by account hash and **storage slot key** if rawStorageKey is true; - // (b) the value is keyed by account hash and **storage slot key hash** if rawStorageKey is false; - storagesOrigin map[common.Address]map[common.Hash][]byte - rawStorageKey bool + // AccountsOrigin holds the original values of mutated accounts, keyed by address. + AccountsOrigin map[common.Address]*types.StateAccount - codes map[common.Address]*contractCode // codes contains the set of dirty codes - nodes *trienode.MergedNodeSet // Aggregated dirty nodes caused by state changes + // StoragesOrigin holds the original values of mutated storage slots. + // The key format depends on StorageKeyType: + // - if StorageKeyType is plain: keyed by account address and plain storage slot key. + // - if StorageKeyType is hashed: keyed by account address and storage slot key hash. + StoragesOrigin map[common.Address]map[common.Hash]common.Hash + StorageKeyType StorageKeyEncoding + + Codes map[common.Address]*ContractCode // Codes contains the set of dirty codes + Nodes *trienode.MergedNodeSet // Aggregated dirty nodes caused by state changes } -// empty returns a flag indicating the state transition is empty or not. -func (sc *stateUpdate) empty() bool { - return sc.originRoot == sc.root +// Empty returns a flag indicating the state transition is empty or not. +func (sc *StateUpdate) Empty() bool { + return sc.OriginRoot == sc.Root } -// newStateUpdate constructs a state update object by identifying the differences +// NewStateUpdate constructs a state update object by identifying the differences // between two states through state execution. It combines the specified account // deletions and account updates to create a complete state update. -// -// rawStorageKey is a flag indicating whether to use the raw storage slot key or -// the hash of the slot key for constructing state update object. -func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash, blockNumber uint64, deletes map[common.Hash]*accountDelete, updates map[common.Hash]*accountUpdate, nodes *trienode.MergedNodeSet) *stateUpdate { +func NewStateUpdate(typ StorageKeyEncoding, originRoot common.Hash, root common.Hash, blockNumber uint64, deletes map[common.Hash]*AccountDelete, updates map[common.Hash]*AccountUpdate, nodes *trienode.MergedNodeSet) *StateUpdate { var ( - accounts = make(map[common.Hash][]byte) - accountsOrigin = make(map[common.Address][]byte) - storages = make(map[common.Hash]map[common.Hash][]byte) - storagesOrigin = make(map[common.Address]map[common.Hash][]byte) - codes = make(map[common.Address]*contractCode) + accounts = make(map[common.Hash]*types.StateAccount) + accountsOrigin = make(map[common.Address]*types.StateAccount) + storages = make(map[common.Hash]map[common.Hash]common.Hash) + storagesOrigin = make(map[common.Address]map[common.Hash]common.Hash) + codes = make(map[common.Address]*ContractCode) ) - // Since some accounts might be destroyed and recreated within the same + // Since some accounts might be deleted and recreated within the same // block, deletions must be aggregated first. for addrHash, op := range deletes { - addr := op.address + addr := op.Address accounts[addrHash] = nil - accountsOrigin[addr] = op.origin + accountsOrigin[addr] = op.Origin - // If storage wiping exists, the hash of the storage slot key must be used - if len(op.storages) > 0 { - storages[addrHash] = op.storages + // If storage wiping is present, the StorageKeyEncoding MUST be hashed. + // Deleted storage slots are iterated from either the trie or the flat + // state snapshot, both of which use the storage slot hash as the identifier. + // Fortunately, storage wiping is no longer practical after the Cancun fork + // and is not expected to occur. + if len(op.Storages) > 0 { + storages[addrHash] = op.Storages } - if len(op.storagesOrigin) > 0 { - storagesOrigin[addr] = op.storagesOrigin + if len(op.StoragesOrigin) > 0 { + storagesOrigin[addr] = op.StoragesOrigin } } // Aggregate account updates then. for addrHash, op := range updates { // Aggregate dirty contract codes if they are available. - addr := op.address - if op.code != nil { - codes[addr] = op.code + addr := op.Address + if op.Code != nil { + codes[addr] = op.Code } - accounts[addrHash] = op.data + accounts[addrHash] = op.Data // Aggregate the account original value. If the account is already - // present in the aggregated accountsOrigin set, skip it. + // present in the aggregated AccountsOrigin set, skip it. if _, found := accountsOrigin[addr]; !found { - accountsOrigin[addr] = op.origin + accountsOrigin[addr] = op.Origin } // Aggregate the storage mutation list. If a slot in op.storages is // already present in aggregated storages set, the value will be // overwritten. - if len(op.storages) > 0 { + if len(op.Storages) > 0 { if _, exist := storages[addrHash]; !exist { - storages[addrHash] = op.storages + storages[addrHash] = op.Storages } else { - maps.Copy(storages[addrHash], op.storages) + maps.Copy(storages[addrHash], op.Storages) } } // Aggregate the storage original values. If the slot is already present - // in aggregated storagesOrigin set, skip it. - storageOriginSet := op.storagesOriginByHash - if rawStorageKey { - storageOriginSet = op.storagesOriginByKey + // in aggregated StoragesOrigin set, skip it. + storageOriginSet := op.StoragesOriginByHash + if typ == StorageKeyPlain { + storageOriginSet = op.StoragesOriginByKey } if len(storageOriginSet) > 0 { origin, exist := storagesOrigin[addr] @@ -173,32 +182,114 @@ func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash } } } - return &stateUpdate{ - originRoot: originRoot, - root: root, - blockNumber: blockNumber, - accounts: accounts, - accountsOrigin: accountsOrigin, - storages: storages, - storagesOrigin: storagesOrigin, - rawStorageKey: rawStorageKey, - codes: codes, - nodes: nodes, + return &StateUpdate{ + OriginRoot: originRoot, + Root: root, + BlockNumber: blockNumber, + Accounts: accounts, + AccountsOrigin: accountsOrigin, + Storages: storages, + StoragesOrigin: storagesOrigin, + StorageKeyType: typ, + Codes: codes, + Nodes: nodes, } } -// stateSet converts the current stateUpdate object into a triedb.StateSet -// object. This function extracts the necessary data from the stateUpdate -// struct and formats it into the StateSet structure consumed by the triedb -// package. -func (sc *stateUpdate) stateSet() *triedb.StateSet { - return &triedb.StateSet{ - Accounts: sc.accounts, - AccountsOrigin: sc.accountsOrigin, - Storages: sc.storages, - StoragesOrigin: sc.storagesOrigin, - RawStorageKey: sc.rawStorageKey, +// encodeSlot encodes the storage slot value by trimming all leading zeros +// and then RLP-encoding the result. +func encodeSlot(value common.Hash) []byte { + if value == (common.Hash{}) { + return nil } + blob, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(value[:])) + return blob +} + +// EncodeMPTState encodes all state mutations alongside their original value +// into the Merkle-Patricia-Trie representation. +// +// It transforms account and storage updates into their corresponding MPT-encoded +// key-value mappings, using the same encoding rules as the Ethereum state trie. +func (sc *StateUpdate) EncodeMPTState() (map[common.Hash][]byte, map[common.Address][]byte, map[common.Hash]map[common.Hash][]byte, map[common.Address]map[common.Hash][]byte) { + var ( + accounts = make(map[common.Hash][]byte, len(sc.Accounts)) + storages = make(map[common.Hash]map[common.Hash][]byte, len(sc.Storages)) + accountOrigin = make(map[common.Address][]byte, len(sc.AccountsOrigin)) + storageOrigin = make(map[common.Address]map[common.Hash][]byte, len(sc.StoragesOrigin)) + ) + for addr, prev := range sc.AccountsOrigin { + if prev == nil { + accountOrigin[addr] = nil + } else { + accountOrigin[addr] = types.SlimAccountRLP(*prev) + } + } + for addrHash, data := range sc.Accounts { + if data == nil { + accounts[addrHash] = nil + } else { + accounts[addrHash] = types.SlimAccountRLP(*data) + } + } + for addr, slots := range sc.StoragesOrigin { + subset := make(map[common.Hash][]byte) + for key, val := range slots { + subset[key] = encodeSlot(val) + } + storageOrigin[addr] = subset + } + for addrHash, slots := range sc.Storages { + subset := make(map[common.Hash][]byte) + for key, val := range slots { + subset[key] = encodeSlot(val) + } + storages[addrHash] = subset + } + return accounts, accountOrigin, storages, storageOrigin +} + +// EncodeUBTState encodes all state mutations alongside their original value +// into the Unified-Binary-Trie representation. +// +// It transforms account and storage updates into their corresponding UBT-encoded +// key-value mappings, using the same encoding rules as the Ethereum state trie. +func (sc *StateUpdate) EncodeUBTState() (map[common.Hash][]byte, map[common.Address][]byte, map[common.Hash]map[common.Hash][]byte, map[common.Address]map[common.Hash][]byte) { + var ( + accounts = make(map[common.Hash][]byte, len(sc.Accounts)) + storages = make(map[common.Hash]map[common.Hash][]byte, len(sc.Storages)) + accountOrigin = make(map[common.Address][]byte, len(sc.AccountsOrigin)) + storageOrigin = make(map[common.Address]map[common.Hash][]byte, len(sc.StoragesOrigin)) + ) + for addr, prev := range sc.AccountsOrigin { + if prev == nil { + accountOrigin[addr] = nil + } else { + accountOrigin[addr] = types.SlimAccountRLP(*prev) + } + } + for addrHash, data := range sc.Accounts { + if data == nil { + accounts[addrHash] = nil + } else { + accounts[addrHash] = types.SlimAccountRLP(*data) + } + } + for addr, slots := range sc.StoragesOrigin { + subset := make(map[common.Hash][]byte) + for key, val := range slots { + subset[key] = encodeSlot(val) + } + storageOrigin[addr] = subset + } + for addrHash, slots := range sc.Storages { + subset := make(map[common.Hash][]byte) + for key, val := range slots { + subset[key] = encodeSlot(val) + } + storages[addrHash] = subset + } + return accounts, accountOrigin, storages, storageOrigin } // deriveCodeFields derives the missing fields of contract code changes @@ -207,135 +298,96 @@ func (sc *stateUpdate) stateSet() *triedb.StateSet { // Note: This operation is expensive and not needed during normal state // transitions. It is only required when SizeTracker or StateUpdate hook // is enabled to produce accurate state statistics. -func (sc *stateUpdate) deriveCodeFields(reader ContractCodeReader) error { +func (sc *StateUpdate) deriveCodeFields(reader ContractCodeReader) error { cache := make(map[common.Hash]bool) - for addr, code := range sc.codes { - if code.originHash != types.EmptyCodeHash { - blob := reader.Code(addr, code.originHash) + for addr, code := range sc.Codes { + if code.OriginHash != types.EmptyCodeHash { + blob := reader.Code(addr, code.OriginHash) if len(blob) == 0 { return fmt.Errorf("original code of %x is empty", addr) } - code.originBlob = blob + code.OriginBlob = blob } - if exists, ok := cache[code.hash]; ok { - code.duplicate = exists + if exists, ok := cache[code.Hash]; ok { + code.Duplicate = exists continue } - res := reader.Has(addr, code.hash) - cache[code.hash] = res - code.duplicate = res + res := reader.Has(addr, code.Hash) + cache[code.Hash] = res + code.Duplicate = res } return nil } -// ToTracingUpdate converts the internal stateUpdate to an exported tracing.StateUpdate. -func (sc *stateUpdate) ToTracingUpdate() (*tracing.StateUpdate, error) { +// ToTracingUpdate converts the internal StateUpdate to an exported tracing.StateUpdate. +func (sc *StateUpdate) ToTracingUpdate() (*tracing.StateUpdate, error) { update := &tracing.StateUpdate{ - OriginRoot: sc.originRoot, - Root: sc.root, - BlockNumber: sc.blockNumber, - AccountChanges: make(map[common.Address]*tracing.AccountChange, len(sc.accountsOrigin)), + OriginRoot: sc.OriginRoot, + Root: sc.Root, + BlockNumber: sc.BlockNumber, + AccountChanges: make(map[common.Address]*tracing.AccountChange, len(sc.AccountsOrigin)), StorageChanges: make(map[common.Address]map[common.Hash]*tracing.StorageChange), - CodeChanges: make(map[common.Address]*tracing.CodeChange, len(sc.codes)), + CodeChanges: make(map[common.Address]*tracing.CodeChange, len(sc.Codes)), TrieChanges: make(map[common.Hash]map[string]*tracing.TrieNodeChange), } // Gather all account changes - for addr, oldData := range sc.accountsOrigin { + for addr, oldData := range sc.AccountsOrigin { addrHash := crypto.Keccak256Hash(addr.Bytes()) - newData, exists := sc.accounts[addrHash] + newData, exists := sc.Accounts[addrHash] if !exists { return nil, fmt.Errorf("account %x not found", addr) } - change := &tracing.AccountChange{} - - if len(oldData) > 0 { - acct, err := types.FullAccount(oldData) - if err != nil { - return nil, err - } - change.Prev = &types.StateAccount{ - Nonce: acct.Nonce, - Balance: acct.Balance, - Root: acct.Root, - CodeHash: acct.CodeHash, - } - } - if len(newData) > 0 { - acct, err := types.FullAccount(newData) - if err != nil { - return nil, err - } - change.New = &types.StateAccount{ - Nonce: acct.Nonce, - Balance: acct.Balance, - Root: acct.Root, - CodeHash: acct.CodeHash, - } + change := &tracing.AccountChange{ + Prev: oldData, + New: newData, } update.AccountChanges[addr] = change } // Gather all storage slot changes - for addr, slots := range sc.storagesOrigin { + for addr, slots := range sc.StoragesOrigin { addrHash := crypto.Keccak256Hash(addr.Bytes()) - subset, exists := sc.storages[addrHash] + subset, exists := sc.Storages[addrHash] if !exists { return nil, fmt.Errorf("storage %x not found", addr) } storageChanges := make(map[common.Hash]*tracing.StorageChange, len(slots)) - for key, encPrev := range slots { + for key, oldData := range slots { // Get new value - handle both raw and hashed key formats var ( exists bool - encNew []byte - decPrev []byte - decNew []byte - err error + newData common.Hash ) - if sc.rawStorageKey { - encNew, exists = subset[crypto.Keccak256Hash(key.Bytes())] + if sc.StorageKeyType == StorageKeyPlain { + newData, exists = subset[crypto.Keccak256Hash(key.Bytes())] } else { - encNew, exists = subset[key] + newData, exists = subset[key] } if !exists { return nil, fmt.Errorf("storage slot %x-%x not found", addr, key) } - - // Decode the prev and new values - if len(encPrev) > 0 { - _, decPrev, _, err = rlp.Split(encPrev) - if err != nil { - return nil, fmt.Errorf("failed to decode prevValue: %v", err) - } - } - if len(encNew) > 0 { - _, decNew, _, err = rlp.Split(encNew) - if err != nil { - return nil, fmt.Errorf("failed to decode newValue: %v", err) - } - } storageChanges[key] = &tracing.StorageChange{ - Prev: common.BytesToHash(decPrev), - New: common.BytesToHash(decNew), + Prev: oldData, + New: newData, } } update.StorageChanges[addr] = storageChanges } // Gather all contract code changes - for addr, code := range sc.codes { + for addr, code := range sc.Codes { change := &tracing.CodeChange{ New: &tracing.ContractCode{ - Hash: code.hash, - Code: code.blob, - Exists: code.duplicate, + Hash: code.Hash, + Code: code.Blob, + Exists: code.Duplicate, }, } - if code.originHash != types.EmptyCodeHash { + if code.OriginHash != types.EmptyCodeHash { change.Prev = &tracing.ContractCode{ - Hash: code.originHash, - Code: code.originBlob, + Hash: code.OriginHash, + Code: code.OriginBlob, Exists: true, } } @@ -343,8 +395,8 @@ func (sc *stateUpdate) ToTracingUpdate() (*tracing.StateUpdate, error) { } // Gather all trie node changes - if sc.nodes != nil { - for owner, subset := range sc.nodes.Sets { + if sc.Nodes != nil { + for owner, subset := range sc.Nodes.Sets { nodeChanges := make(map[string]*tracing.TrieNodeChange, len(subset.Origins)) for path, oldNode := range subset.Origins { newNode, exists := subset.Nodes[path]