diff --git a/triedb/generate.go b/triedb/generate.go new file mode 100644 index 0000000000..259e139848 --- /dev/null +++ b/triedb/generate.go @@ -0,0 +1,108 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package triedb + +import ( + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/triedb/internal" +) + +// kvAccountIterator wraps an ethdb.Iterator to iterate over account snapshot +// entries in the database, implementing internal.AccountIterator. +type kvAccountIterator struct { + it ethdb.Iterator + hash common.Hash +} + +func newKVAccountIterator(db ethdb.Iteratee) *kvAccountIterator { + it := rawdb.NewKeyLengthIterator( + db.NewIterator(rawdb.SnapshotAccountPrefix, nil), + len(rawdb.SnapshotAccountPrefix)+common.HashLength, + ) + return &kvAccountIterator{it: it} +} + +func (it *kvAccountIterator) Next() bool { + if !it.it.Next() { + return false + } + key := it.it.Key() + copy(it.hash[:], key[len(rawdb.SnapshotAccountPrefix):]) + return true +} + +func (it *kvAccountIterator) Hash() common.Hash { return it.hash } +func (it *kvAccountIterator) Account() []byte { return it.it.Value() } +func (it *kvAccountIterator) Error() error { return it.it.Error() } +func (it *kvAccountIterator) Release() { it.it.Release() } + +// kvStorageIterator wraps an ethdb.Iterator to iterate over storage snapshot +// entries for a specific account, implementing internal.StorageIterator. +type kvStorageIterator struct { + it ethdb.Iterator + hash common.Hash +} + +func newKVStorageIterator(db ethdb.Iteratee, accountHash common.Hash) *kvStorageIterator { + it := rawdb.IterateStorageSnapshots(db, accountHash) + return &kvStorageIterator{it: it} +} + +func (it *kvStorageIterator) Next() bool { + if !it.it.Next() { + return false + } + key := it.it.Key() + copy(it.hash[:], key[len(rawdb.SnapshotStoragePrefix)+common.HashLength:]) + return true +} + +func (it *kvStorageIterator) Hash() common.Hash { return it.hash } +func (it *kvStorageIterator) Slot() []byte { return it.it.Value() } +func (it *kvStorageIterator) Error() error { return it.it.Error() } +func (it *kvStorageIterator) Release() { it.it.Release() } + +// GenerateTrie rebuilds all tries (storage + account) from flat snapshot data +// in the database. It reads account and storage snapshots from the KV store, +// builds tries using StackTrie with streaming node writes, and verifies the +// computed state root matches the expected root. +func GenerateTrie(db ethdb.Database, scheme string, root common.Hash) error { + acctIt := newKVAccountIterator(db) + defer acctIt.Release() + + got, err := internal.GenerateTrieRoot(db, scheme, acctIt, common.Hash{}, internal.StackTrieGenerate, func(dst ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *internal.GenerateStats) (common.Hash, error) { + storageIt := newKVStorageIterator(db, accountHash) + defer storageIt.Release() + + hash, err := internal.GenerateTrieRoot(dst, scheme, storageIt, accountHash, internal.StackTrieGenerate, nil, stat, false) + if err != nil { + return common.Hash{}, err + } + return hash, nil + }, internal.NewGenerateStats(), true) + if err != nil { + return err + } + if got != root { + return fmt.Errorf("state root mismatch: got %x, want %x", got, root) + } + return nil +} diff --git a/triedb/generate_test.go b/triedb/generate_test.go new file mode 100644 index 0000000000..42bccd9aa3 --- /dev/null +++ b/triedb/generate_test.go @@ -0,0 +1,178 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package triedb + +import ( + "bytes" + "sort" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" + "github.com/holiman/uint256" +) + +// testAccount is a helper for building test state with deterministic ordering. +type testAccount struct { + hash common.Hash + account types.StateAccount + storage []testSlot // must be sorted by hash +} + +type testSlot struct { + hash common.Hash + value []byte +} + +// buildExpectedRoot computes the state root from sorted test accounts using +// StackTrie (which requires sorted key insertion). +func buildExpectedRoot(t *testing.T, accounts []testAccount) common.Hash { + t.Helper() + // Sort accounts by hash + sort.Slice(accounts, func(i, j int) bool { + return bytes.Compare(accounts[i].hash[:], accounts[j].hash[:]) < 0 + }) + acctTrie := trie.NewStackTrie(nil) + for i := range accounts { + data, err := rlp.EncodeToBytes(&accounts[i].account) + if err != nil { + t.Fatal(err) + } + acctTrie.Update(accounts[i].hash[:], data) + } + return acctTrie.Hash() +} + +// computeStorageRoot computes the storage trie root from sorted slots. +func computeStorageRoot(slots []testSlot) common.Hash { + sort.Slice(slots, func(i, j int) bool { + return bytes.Compare(slots[i].hash[:], slots[j].hash[:]) < 0 + }) + st := trie.NewStackTrie(nil) + for _, s := range slots { + st.Update(s.hash[:], s.value) + } + return st.Hash() +} + +func TestGenerateTrieEmpty(t *testing.T) { + db := rawdb.NewMemoryDatabase() + if err := GenerateTrie(db, rawdb.HashScheme, types.EmptyRootHash); err != nil { + t.Fatalf("GenerateTrie on empty state failed: %v", err) + } +} + +func TestGenerateTrieAccountsOnly(t *testing.T) { + db := rawdb.NewMemoryDatabase() + + accounts := []testAccount{ + { + hash: common.HexToHash("0x01"), + account: types.StateAccount{ + Nonce: 1, + Balance: uint256.NewInt(100), + Root: types.EmptyRootHash, + CodeHash: types.EmptyCodeHash.Bytes(), + }, + }, + { + hash: common.HexToHash("0x02"), + account: types.StateAccount{ + Nonce: 2, + Balance: uint256.NewInt(200), + Root: types.EmptyRootHash, + CodeHash: types.EmptyCodeHash.Bytes(), + }, + }, + } + for _, a := range accounts { + rawdb.WriteAccountSnapshot(db, a.hash, types.SlimAccountRLP(a.account)) + } + root := buildExpectedRoot(t, accounts) + + if err := GenerateTrie(db, rawdb.HashScheme, root); err != nil { + t.Fatalf("GenerateTrie failed: %v", err) + } +} + +func TestGenerateTrieWithStorage(t *testing.T) { + db := rawdb.NewMemoryDatabase() + + slots := []testSlot{ + {hash: common.HexToHash("0xaa"), value: []byte{0x01, 0x02, 0x03}}, + {hash: common.HexToHash("0xbb"), value: []byte{0x04, 0x05, 0x06}}, + } + storageRoot := computeStorageRoot(slots) + + accounts := []testAccount{ + { + hash: common.HexToHash("0x01"), + account: types.StateAccount{ + Nonce: 1, + Balance: uint256.NewInt(100), + Root: storageRoot, + CodeHash: types.EmptyCodeHash.Bytes(), + }, + storage: slots, + }, + { + hash: common.HexToHash("0x02"), + account: types.StateAccount{ + Nonce: 0, + Balance: uint256.NewInt(50), + Root: types.EmptyRootHash, + CodeHash: types.EmptyCodeHash.Bytes(), + }, + }, + } + // Write account snapshots + for _, a := range accounts { + rawdb.WriteAccountSnapshot(db, a.hash, types.SlimAccountRLP(a.account)) + } + // Write storage snapshots + for _, a := range accounts { + for _, s := range a.storage { + rawdb.WriteStorageSnapshot(db, a.hash, s.hash, s.value) + } + } + root := buildExpectedRoot(t, accounts) + + if err := GenerateTrie(db, rawdb.HashScheme, root); err != nil { + t.Fatalf("GenerateTrie failed: %v", err) + } +} + +func TestGenerateTrieRootMismatch(t *testing.T) { + db := rawdb.NewMemoryDatabase() + + acct := types.StateAccount{ + Nonce: 1, + Balance: uint256.NewInt(100), + Root: types.EmptyRootHash, + CodeHash: types.EmptyCodeHash.Bytes(), + } + rawdb.WriteAccountSnapshot(db, common.HexToHash("0x01"), types.SlimAccountRLP(acct)) + + wrongRoot := common.HexToHash("0xdeadbeef") + err := GenerateTrie(db, rawdb.HashScheme, wrongRoot) + if err == nil { + t.Fatal("expected error for root mismatch, got nil") + } +} diff --git a/triedb/internal/conversion.go b/triedb/internal/conversion.go new file mode 100644 index 0000000000..b331b63e21 --- /dev/null +++ b/triedb/internal/conversion.go @@ -0,0 +1,363 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// Package internal contains shared trie generation utilities used by both +// triedb and triedb/pathdb. All code is ported from +// core/state/snapshot/conversion.go (with exported names) unless noted. +package internal + +import ( + "encoding/binary" + "fmt" + "math" + "runtime" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" +) + +// Iterator is an iterator to step over all the accounts or the specific +// storage in a snapshot which may or may not be composed of multiple layers. +type Iterator interface { + // Next steps the iterator forward one element, returning false if exhausted, + // or an error if iteration failed for some reason (e.g. root being iterated + // becomes stale and garbage collected). + Next() bool + + // Error returns any failure that occurred during iteration, which might have + // caused a premature iteration exit (e.g. snapshot stack becoming stale). + Error() error + + // Hash returns the hash of the account or storage slot the iterator is + // currently at. + Hash() common.Hash + + // Release releases associated resources. Release should always succeed and + // can be called multiple times without causing error. + Release() +} + +// AccountIterator is an iterator to step over all the accounts in a snapshot, +// which may or may not be composed of multiple layers. +type AccountIterator interface { + Iterator + + // Account returns the RLP encoded slim account the iterator is currently at. + // An error will be returned if the iterator becomes invalid + Account() []byte +} + +// StorageIterator is an iterator to step over the specific storage in a snapshot, +// which may or may not be composed of multiple layers. +type StorageIterator interface { + Iterator + + // Slot returns the storage slot the iterator is currently at. An error will + // be returned if the iterator becomes invalid + Slot() []byte +} + +// TrieKV represents a trie key-value pair. +type TrieKV struct { + Key common.Hash + Value []byte +} + +type ( + // TrieGeneratorFn is the interface of trie generation which can + // be implemented by different trie algorithm. + TrieGeneratorFn func(db ethdb.KeyValueWriter, scheme string, owner common.Hash, in chan (TrieKV), out chan (common.Hash)) + + // LeafCallbackFn is the callback invoked at the leaves of the trie, + // returns the subtrie root with the specified subtrie identifier. + LeafCallbackFn func(db ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *GenerateStats) (common.Hash, error) +) + +// GenerateStats is a collection of statistics gathered by the trie generator +// for logging purposes. +type GenerateStats struct { + head common.Hash + start time.Time + + accounts uint64 // Number of accounts done (including those being crawled) + slots uint64 // Number of storage slots done (including those being crawled) + + slotsStart map[common.Hash]time.Time // Start time for account slot crawling + slotsHead map[common.Hash]common.Hash // Slot head for accounts being crawled + + lock sync.RWMutex +} + +// NewGenerateStats creates a new generator stats. +func NewGenerateStats() *GenerateStats { + return &GenerateStats{ + slotsStart: make(map[common.Hash]time.Time), + slotsHead: make(map[common.Hash]common.Hash), + start: time.Now(), + } +} + +// ProgressAccounts updates the generator stats for the account range. +func (stat *GenerateStats) ProgressAccounts(account common.Hash, done uint64) { + stat.lock.Lock() + defer stat.lock.Unlock() + + stat.accounts += done + stat.head = account +} + +// FinishAccounts updates the generator stats for the finished account range. +func (stat *GenerateStats) FinishAccounts(done uint64) { + stat.lock.Lock() + defer stat.lock.Unlock() + + stat.accounts += done +} + +// ProgressContract updates the generator stats for a specific in-progress contract. +func (stat *GenerateStats) ProgressContract(account common.Hash, slot common.Hash, done uint64) { + stat.lock.Lock() + defer stat.lock.Unlock() + + stat.slots += done + stat.slotsHead[account] = slot + if _, ok := stat.slotsStart[account]; !ok { + stat.slotsStart[account] = time.Now() + } +} + +// FinishContract updates the generator stats for a specific just-finished contract. +func (stat *GenerateStats) FinishContract(account common.Hash, done uint64) { + stat.lock.Lock() + defer stat.lock.Unlock() + + stat.slots += done + delete(stat.slotsHead, account) + delete(stat.slotsStart, account) +} + +// Report prints the cumulative progress statistic smartly. +func (stat *GenerateStats) Report() { + stat.lock.RLock() + defer stat.lock.RUnlock() + + ctx := []interface{}{ + "accounts", stat.accounts, + "slots", stat.slots, + "elapsed", common.PrettyDuration(time.Since(stat.start)), + } + if stat.accounts > 0 { + if done := binary.BigEndian.Uint64(stat.head[:8]) / stat.accounts; done > 0 { + var ( + left = (math.MaxUint64 - binary.BigEndian.Uint64(stat.head[:8])) / stat.accounts + eta = common.CalculateETA(done, left, time.Since(stat.start)) + ) + // If there are large contract crawls in progress, estimate their finish time + for acc, head := range stat.slotsHead { + start := stat.slotsStart[acc] + if done := binary.BigEndian.Uint64(head[:8]); done > 0 { + left := math.MaxUint64 - binary.BigEndian.Uint64(head[:8]) + + // Override the ETA if larger than the largest until now + if slotETA := common.CalculateETA(done, left, time.Since(start)); eta < slotETA { + eta = slotETA + } + } + } + ctx = append(ctx, []interface{}{ + "eta", common.PrettyDuration(eta), + }...) + } + } + log.Info("Iterating state snapshot", ctx...) +} + +// ReportDone prints the last log when the whole generation is finished. +func (stat *GenerateStats) ReportDone() { + stat.lock.RLock() + defer stat.lock.RUnlock() + + var ctx []interface{} + ctx = append(ctx, []interface{}{"accounts", stat.accounts}...) + if stat.slots != 0 { + ctx = append(ctx, []interface{}{"slots", stat.slots}...) + } + ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...) + log.Info("Iterated snapshot", ctx...) +} + +// RunReport periodically prints the progress information. +func RunReport(stats *GenerateStats, stop chan bool) { + timer := time.NewTimer(0) + defer timer.Stop() + + for { + select { + case <-timer.C: + stats.Report() + timer.Reset(time.Second * 8) + case success := <-stop: + if success { + stats.ReportDone() + } + return + } + } +} + +// GenerateTrieRoot generates the trie hash based on the snapshot iterator. +// It can be used for generating account trie, storage trie or even the +// whole state which connects the accounts and the corresponding storages. +func GenerateTrieRoot(db ethdb.KeyValueWriter, scheme string, it Iterator, account common.Hash, generatorFn TrieGeneratorFn, leafCallback LeafCallbackFn, stats *GenerateStats, report bool) (common.Hash, error) { + var ( + in = make(chan TrieKV) // chan to pass leaves + out = make(chan common.Hash, 1) // chan to collect result + stoplog = make(chan bool, 1) // 1-size buffer, works when logging is not enabled + wg sync.WaitGroup + ) + // Spin up a go-routine for trie hash re-generation + wg.Add(1) + go func() { + defer wg.Done() + generatorFn(db, scheme, account, in, out) + }() + // Spin up a go-routine for progress logging + if report && stats != nil { + wg.Add(1) + go func() { + defer wg.Done() + RunReport(stats, stoplog) + }() + } + // Create a semaphore to assign tasks and collect results through. We'll pre- + // fill it with nils, thus using the same channel for both limiting concurrent + // processing and gathering results. + threads := runtime.NumCPU() + results := make(chan error, threads) + for i := 0; i < threads; i++ { + results <- nil // fill the semaphore + } + // stop is a helper function to shutdown the background threads + // and return the re-generated trie hash. + stop := func(fail error) (common.Hash, error) { + close(in) + result := <-out + for i := 0; i < threads; i++ { + if err := <-results; err != nil && fail == nil { + fail = err + } + } + stoplog <- fail == nil + + wg.Wait() + return result, fail + } + var ( + logged = time.Now() + processed = uint64(0) + leaf TrieKV + ) + // Start to feed leaves + for it.Next() { + if account == (common.Hash{}) { + var ( + err error + fullData []byte + ) + if leafCallback == nil { + fullData, err = types.FullAccountRLP(it.(AccountIterator).Account()) + if err != nil { + return stop(err) + } + } else { + // Wait until the semaphore allows us to continue, aborting if + // a sub-task failed + if err := <-results; err != nil { + results <- nil // stop will drain the results, add a noop back for this error we just consumed + return stop(err) + } + // Fetch the next account and process it concurrently + account, err := types.FullAccount(it.(AccountIterator).Account()) + if err != nil { + return stop(err) + } + go func(hash common.Hash) { + subroot, err := leafCallback(db, hash, common.BytesToHash(account.CodeHash), stats) + if err != nil { + results <- err + return + } + if account.Root != subroot { + results <- fmt.Errorf("invalid subroot(path %x), want %x, have %x", hash, account.Root, subroot) + return + } + results <- nil + }(it.Hash()) + fullData, err = rlp.EncodeToBytes(account) + if err != nil { + return stop(err) + } + } + leaf = TrieKV{it.Hash(), fullData} + } else { + leaf = TrieKV{it.Hash(), common.CopyBytes(it.(StorageIterator).Slot())} + } + in <- leaf + + // Accumulate the generation statistic if it's required. + processed++ + if time.Since(logged) > 3*time.Second && stats != nil { + if account == (common.Hash{}) { + stats.ProgressAccounts(it.Hash(), processed) + } else { + stats.ProgressContract(account, it.Hash(), processed) + } + logged, processed = time.Now(), 0 + } + } + // Commit the last part statistic. + if processed > 0 && stats != nil { + if account == (common.Hash{}) { + stats.FinishAccounts(processed) + } else { + stats.FinishContract(account, processed) + } + } + return stop(nil) +} + +// StackTrieGenerate is the trie generation function that creates a StackTrie +// and persists nodes via rawdb.WriteTrieNode. +func StackTrieGenerate(db ethdb.KeyValueWriter, scheme string, owner common.Hash, in chan TrieKV, out chan common.Hash) { + var onTrieNode trie.OnTrieNode + if db != nil { + onTrieNode = func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(db, owner, path, hash, blob, scheme) + } + } + t := trie.NewStackTrie(onTrieNode) + for leaf := range in { + t.Update(leaf.Key[:], leaf.Value) + } + out <- t.Hash() +} diff --git a/triedb/pathdb/iterator.go b/triedb/pathdb/iterator.go index 8ca8247206..2d333dfa1b 100644 --- a/triedb/pathdb/iterator.go +++ b/triedb/pathdb/iterator.go @@ -24,48 +24,15 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/triedb/internal" ) -// Iterator is an iterator to step over all the accounts or the specific -// storage in a snapshot which may or may not be composed of multiple layers. -type Iterator interface { - // Next steps the iterator forward one element, returning false if exhausted, - // or an error if iteration failed for some reason (e.g. root being iterated - // becomes stale and garbage collected). - Next() bool - - // Error returns any failure that occurred during iteration, which might have - // caused a premature iteration exit (e.g. layer stack becoming stale). - Error() error - - // Hash returns the hash of the account or storage slot the iterator is - // currently at. - Hash() common.Hash - - // Release releases associated resources. Release should always succeed and - // can be called multiple times without causing error. - Release() -} - -// AccountIterator is an iterator to step over all the accounts in a snapshot, -// which may or may not be composed of multiple layers. -type AccountIterator interface { - Iterator - - // Account returns the RLP encoded slim account the iterator is currently at. - // An error will be returned if the iterator becomes invalid - Account() []byte -} - -// StorageIterator is an iterator to step over the specific storage in a snapshot, -// which may or may not be composed of multiple layers. -type StorageIterator interface { - Iterator - - // Slot returns the storage slot the iterator is currently at. An error will - // be returned if the iterator becomes invalid - Slot() []byte -} +// Type aliases for the iterator interfaces defined in triedb/internal. +type ( + Iterator = internal.Iterator + AccountIterator = internal.AccountIterator + StorageIterator = internal.StorageIterator +) type ( // loadAccount is the function to retrieve the account from the associated diff --git a/triedb/pathdb/verifier.go b/triedb/pathdb/verifier.go index a69b10f4f3..c53590f2fd 100644 --- a/triedb/pathdb/verifier.go +++ b/triedb/pathdb/verifier.go @@ -17,36 +17,15 @@ package pathdb import ( - "encoding/binary" "errors" "fmt" - "math" - "runtime" - "sync" - "time" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" - "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/trie" -) - -// trieKV represents a trie key-value pair -type trieKV struct { - key common.Hash - value []byte -} - -type ( - // trieHasherFn is the interface of trie hasher which can be implemented - // by different trie algorithm. - trieHasherFn func(in chan trieKV, out chan common.Hash) - - // leafCallbackFn is the callback invoked at the leaves of the trie, - // returns the subtrie root with the specified subtrie identifier. - leafCallbackFn func(accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) + "github.com/ethereum/go-ethereum/triedb/internal" ) // VerifyState traverses the flat states specified by the given state root and @@ -58,7 +37,7 @@ func (db *Database) VerifyState(root common.Hash) error { } defer acctIt.Release() - got, err := generateTrieRoot(acctIt, common.Hash{}, stackTrieHasher, func(accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) { + got, err := internal.GenerateTrieRoot(nil, "", acctIt, common.Hash{}, stackTrieHasher, func(_ ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *internal.GenerateStats) (common.Hash, error) { // Migrate the code first, commit the contract code into the tmp db. if codeHash != types.EmptyCodeHash { code := rawdb.ReadCode(db.diskdb, codeHash) @@ -73,12 +52,12 @@ func (db *Database) VerifyState(root common.Hash) error { } defer storageIt.Release() - hash, err := generateTrieRoot(storageIt, accountHash, stackTrieHasher, nil, stat, false) + hash, err := internal.GenerateTrieRoot(nil, "", storageIt, accountHash, stackTrieHasher, nil, stat, false) if err != nil { return common.Hash{}, err } return hash, nil - }, newGenerateStats(), true) + }, internal.NewGenerateStats(), true) if err != nil { return err @@ -89,264 +68,10 @@ func (db *Database) VerifyState(root common.Hash) error { return nil } -// generateStats is a collection of statistics gathered by the trie generator -// for logging purposes. -type generateStats struct { - head common.Hash - start time.Time - - accounts uint64 // Number of accounts done (including those being crawled) - slots uint64 // Number of storage slots done (including those being crawled) - - slotsStart map[common.Hash]time.Time // Start time for account slot crawling - slotsHead map[common.Hash]common.Hash // Slot head for accounts being crawled - - lock sync.RWMutex -} - -// newGenerateStats creates a new generator stats. -func newGenerateStats() *generateStats { - return &generateStats{ - slotsStart: make(map[common.Hash]time.Time), - slotsHead: make(map[common.Hash]common.Hash), - start: time.Now(), - } -} - -// progressAccounts updates the generator stats for the account range. -func (stat *generateStats) progressAccounts(account common.Hash, done uint64) { - stat.lock.Lock() - defer stat.lock.Unlock() - - stat.accounts += done - stat.head = account -} - -// finishAccounts updates the generator stats for the finished account range. -func (stat *generateStats) finishAccounts(done uint64) { - stat.lock.Lock() - defer stat.lock.Unlock() - - stat.accounts += done -} - -// progressContract updates the generator stats for a specific in-progress contract. -func (stat *generateStats) progressContract(account common.Hash, slot common.Hash, done uint64) { - stat.lock.Lock() - defer stat.lock.Unlock() - - stat.slots += done - stat.slotsHead[account] = slot - if _, ok := stat.slotsStart[account]; !ok { - stat.slotsStart[account] = time.Now() - } -} - -// finishContract updates the generator stats for a specific just-finished contract. -func (stat *generateStats) finishContract(account common.Hash, done uint64) { - stat.lock.Lock() - defer stat.lock.Unlock() - - stat.slots += done - delete(stat.slotsHead, account) - delete(stat.slotsStart, account) -} - -// report prints the cumulative progress statistic smartly. -func (stat *generateStats) report() { - stat.lock.RLock() - defer stat.lock.RUnlock() - - ctx := []interface{}{ - "accounts", stat.accounts, - "slots", stat.slots, - "elapsed", common.PrettyDuration(time.Since(stat.start)), - } - if stat.accounts > 0 { - // If there's progress on the account trie, estimate the time to finish crawling it - if done := binary.BigEndian.Uint64(stat.head[:8]) / stat.accounts; done > 0 { - var ( - left = (math.MaxUint64 - binary.BigEndian.Uint64(stat.head[:8])) / stat.accounts - eta = common.CalculateETA(done, left, time.Since(stat.start)) - ) - // If there are large contract crawls in progress, estimate their finish time - for acc, head := range stat.slotsHead { - start := stat.slotsStart[acc] - if done := binary.BigEndian.Uint64(head[:8]); done > 0 { - left := math.MaxUint64 - binary.BigEndian.Uint64(head[:8]) - - // Override the ETA if larger than the largest until now - if slotETA := common.CalculateETA(done, left, time.Since(start)); eta < slotETA { - eta = slotETA - } - } - } - ctx = append(ctx, []interface{}{ - "eta", common.PrettyDuration(eta), - }...) - } - } - log.Info("Iterating state snapshot", ctx...) -} - -// reportDone prints the last log when the whole generation is finished. -func (stat *generateStats) reportDone() { - stat.lock.RLock() - defer stat.lock.RUnlock() - - var ctx []interface{} - ctx = append(ctx, []interface{}{"accounts", stat.accounts}...) - if stat.slots != 0 { - ctx = append(ctx, []interface{}{"slots", stat.slots}...) - } - ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...) - log.Info("Iterated snapshot", ctx...) -} - -// runReport periodically prints the progress information. -func runReport(stats *generateStats, stop chan bool) { - timer := time.NewTimer(0) - defer timer.Stop() - - for { - select { - case <-timer.C: - stats.report() - timer.Reset(time.Second * 8) - case success := <-stop: - if success { - stats.reportDone() - } - return - } - } -} - -// generateTrieRoot generates the trie hash based on the snapshot iterator. -// It can be used for generating account trie, storage trie or even the -// whole state which connects the accounts and the corresponding storages. -func generateTrieRoot(it Iterator, account common.Hash, generatorFn trieHasherFn, leafCallback leafCallbackFn, stats *generateStats, report bool) (common.Hash, error) { - var ( - in = make(chan trieKV) // chan to pass leaves - out = make(chan common.Hash, 1) // chan to collect result - stoplog = make(chan bool, 1) // 1-size buffer, works when logging is not enabled - wg sync.WaitGroup - ) - // Spin up a go-routine for trie hash re-generation - wg.Add(1) - go func() { - defer wg.Done() - generatorFn(in, out) - }() - // Spin up a go-routine for progress logging - if report && stats != nil { - wg.Add(1) - go func() { - defer wg.Done() - runReport(stats, stoplog) - }() - } - // Create a semaphore to assign tasks and collect results through. We'll pre- - // fill it with nils, thus using the same channel for both limiting concurrent - // processing and gathering results. - threads := runtime.NumCPU() - results := make(chan error, threads) - for i := 0; i < threads; i++ { - results <- nil // fill the semaphore - } - // stop is a helper function to shutdown the background threads - // and return the re-generated trie hash. - stop := func(fail error) (common.Hash, error) { - close(in) - result := <-out - for i := 0; i < threads; i++ { - if err := <-results; err != nil && fail == nil { - fail = err - } - } - stoplog <- fail == nil - - wg.Wait() - return result, fail - } - var ( - logged = time.Now() - processed = uint64(0) - leaf trieKV - ) - // Start to feed leaves - for it.Next() { - if account == (common.Hash{}) { - var ( - err error - fullData []byte - ) - if leafCallback == nil { - fullData, err = types.FullAccountRLP(it.(AccountIterator).Account()) - if err != nil { - return stop(err) - } - } else { - // Wait until the semaphore allows us to continue, aborting if - // a sub-task failed - if err := <-results; err != nil { - results <- nil // stop will drain the results, add a noop back for this error we just consumed - return stop(err) - } - // Fetch the next account and process it concurrently - account, err := types.FullAccount(it.(AccountIterator).Account()) - if err != nil { - return stop(err) - } - go func(hash common.Hash) { - subroot, err := leafCallback(hash, common.BytesToHash(account.CodeHash), stats) - if err != nil { - results <- err - return - } - if account.Root != subroot { - results <- fmt.Errorf("invalid subroot(path %x), want %x, have %x", hash, account.Root, subroot) - return - } - results <- nil - }(it.Hash()) - fullData, err = rlp.EncodeToBytes(account) - if err != nil { - return stop(err) - } - } - leaf = trieKV{it.Hash(), fullData} - } else { - leaf = trieKV{it.Hash(), common.CopyBytes(it.(StorageIterator).Slot())} - } - in <- leaf - - // Accumulate the generation statistic if it's required. - processed++ - if time.Since(logged) > 3*time.Second && stats != nil { - if account == (common.Hash{}) { - stats.progressAccounts(it.Hash(), processed) - } else { - stats.progressContract(account, it.Hash(), processed) - } - logged, processed = time.Now(), 0 - } - } - // Commit the last part statistic. - if processed > 0 && stats != nil { - if account == (common.Hash{}) { - stats.finishAccounts(processed) - } else { - stats.finishContract(account, processed) - } - } - return stop(nil) -} - -func stackTrieHasher(in chan trieKV, out chan common.Hash) { +func stackTrieHasher(_ ethdb.KeyValueWriter, _ string, _ common.Hash, in chan internal.TrieKV, out chan common.Hash) { t := trie.NewStackTrie(nil) for leaf := range in { - t.Update(leaf.key[:], leaf.value) + t.Update(leaf.Key[:], leaf.Value) } out <- t.Hash() }