From 6b830ce8fbe96d4fb1520ffc8dd80d08812afd80 Mon Sep 17 00:00:00 2001 From: jonny rhea <5555162+jrhea@users.noreply.github.com> Date: Tue, 31 Mar 2026 11:34:14 -0500 Subject: [PATCH] eth/downloader, eth/protocols/snap: BAL req wiring eth/downloader, eth/protocols/snap: remove healing and genTrie, restructure sync loop for snap/2 eth/protocols/snap: Implement BAL fetching eth/protocols/snap: create functions for bal verification and apply eth/downloader,eth/protocols/snap: implement catch-up on pivot eth/protocols/snap: add tests and fix peer registration for access lists eth/protocols/snap: add pivot movement integration tests core, core/state/snapshot: skip snapshot generation after sync completion eth/protocols/snap: skip new empty accounts in applyAccessList and test --- core/blockchain.go | 6 +- core/state/snapshot/snapshot.go | 22 + eth/downloader/downloader.go | 60 +- eth/downloader/downloader_test.go | 21 +- eth/downloader/statesync.go | 16 +- eth/protocols/snap/bal_apply.go | 120 ++ eth/protocols/snap/bal_apply_test.go | 299 +++++ eth/protocols/snap/gentrie.go | 41 - eth/protocols/snap/handlers.go | 13 + eth/protocols/snap/metrics.go | 7 - eth/protocols/snap/peer.go | 25 +- eth/protocols/snap/sync.go | 1699 ++++++++------------------ eth/protocols/snap/sync_test.go | 1074 +++++++++++++--- 13 files changed, 1932 insertions(+), 1471 deletions(-) create mode 100644 eth/protocols/snap/bal_apply.go create mode 100644 eth/protocols/snap/bal_apply_test.go diff --git a/core/blockchain.go b/core/blockchain.go index 2b49111121..5164f9a1e5 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -1179,10 +1179,10 @@ func (bc *BlockChain) SnapSyncComplete(hash common.Hash) error { if !bc.HasState(root) { return fmt.Errorf("non existent state [%x..]", root[:4]) } - // Destroy any existing state snapshot and regenerate it in the background, - // also resuming the normal maintenance of any previously paused snapshot. + // Set up the snapshot tree from the synced flat state. Snap/2 downloads + // flat state directly as the snapshot. if bc.snaps != nil { - bc.snaps.Rebuild(root) + bc.snaps.RebuildFromSyncedState(root) } // If all checks out, manually set the head block. diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go index f0f6296433..4a69ec3f49 100644 --- a/core/state/snapshot/snapshot.go +++ b/core/state/snapshot/snapshot.go @@ -23,6 +23,7 @@ import ( "fmt" "sync" + "github.com/VictoriaMetrics/fastcache" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" @@ -726,6 +727,27 @@ func (t *Tree) Rebuild(root common.Hash) { } } +// RebuildFromSyncedState sets up the snapshot tree to use flat state that was +// already downloaded by snap sync. Unlike Rebuild, it does NOT regenerate the +// snapshot from the trie. +func (t *Tree) RebuildFromSyncedState(root common.Hash) { + t.lock.Lock() + defer t.lock.Unlock() + rawdb.DeleteSnapshotRecoveryNumber(t.diskdb) + rawdb.DeleteSnapshotDisabled(t.diskdb) + rawdb.WriteSnapshotRoot(t.diskdb, root) + journalProgress(t.diskdb, nil, nil) + log.Info("Setting up snapshot from synced state", "root", root) + t.layers = map[common.Hash]snapshot{ + root: &diskLayer{ + diskdb: t.diskdb, + triedb: t.triedb, + cache: fastcache.New(t.config.CacheSize * 1024 * 1024), + root: root, + }, + } +} + // AccountIterator creates a new account iterator for the specified root hash and // seeks to a starting account hash. func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) { diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index 1de0933842..11da4d0bb6 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -276,7 +276,7 @@ func (d *Downloader) Progress() ethereum.SyncProgress { default: log.Error("Unknown downloader mode", "mode", mode) } - progress, pending := d.SnapSyncer.Progress() + progress := d.SnapSyncer.Progress() return ethereum.SyncProgress{ StartingBlock: d.syncStatsChainOrigin, @@ -288,12 +288,6 @@ func (d *Downloader) Progress() ethereum.SyncProgress { SyncedBytecodeBytes: uint64(progress.BytecodeBytes), SyncedStorage: progress.StorageSynced, SyncedStorageBytes: uint64(progress.StorageBytes), - HealedTrienodes: progress.TrienodeHealSynced, - HealedTrienodeBytes: uint64(progress.TrienodeHealBytes), - HealedBytecodes: progress.BytecodeHealSynced, - HealedBytecodeBytes: uint64(progress.BytecodeHealBytes), - HealingTrienodes: pending.TrienodeHeal, - HealingBytecode: pending.BytecodeHeal, } } @@ -873,13 +867,48 @@ func (d *Downloader) importBlockResults(results []*fetchResult) error { return nil } +// checkDeepReorg checks if the old pivot block was reorged by comparing its +// state root against the current canonical chain. If the canonical header at +// the old pivot's block number has a different state root, the syncer's flat +// state is from the old fork and must be wiped. Returns true if a deep reorg +// was detected. +// +// Returns false (no reorg) when the canonical hash or header is missing. This +// avoids false positives from pruned or not-yet-downloaded data. If the chain +// really did shorten past the old pivot, sync.catchUp's from > to guard will +// catch this. +func checkDeepReorg(db ethdb.Database, oldNumber uint64, oldRoot common.Hash) bool { + oldHash := rawdb.ReadCanonicalHash(db, oldNumber) + if oldHash == (common.Hash{}) { + return false + } + oldHeader := rawdb.ReadHeader(db, oldHash, oldNumber) + if oldHeader == nil { + return false + } + return oldHeader.Root != oldRoot +} + +// restartSnapSync cancels the current state sync and starts a new one with the +// given root. Before restarting, it checks for deep reorgs and wipes sync +// progress if the old pivot was reorged. +func (d *Downloader) restartSnapSync(oldSync *stateSync, newRoot common.Hash, newNumber uint64) *stateSync { + if checkDeepReorg(d.stateDB, oldSync.number, oldSync.root) { + log.Warn("Deep reorg detected, restarting snap sync from scratch", + "number", oldSync.number, "oldRoot", oldSync.root) + rawdb.WriteSnapshotSyncStatus(d.stateDB, nil) + } + oldSync.Cancel() + return d.syncState(newRoot, newNumber) +} + // processSnapSyncContent takes fetch results from the queue and writes them to the // database. It also controls the synchronisation of state nodes of the pivot block. func (d *Downloader) processSnapSyncContent() error { // Start syncing state of the reported head block. This should get us most of // the state of the pivot block. d.pivotLock.RLock() - sync := d.syncState(d.pivotHeader.Root) + sync := d.syncState(d.pivotHeader.Root, d.pivotHeader.Number.Uint64()) d.pivotLock.RUnlock() defer func() { @@ -950,9 +979,7 @@ func (d *Downloader) processSnapSyncContent() error { if oldPivot == nil { // no results piling up, we can move the pivot if !d.committed.Load() { // not yet passed the pivot, we can move the pivot if pivot.Root != sync.root { // pivot position changed, we can move the pivot - sync.Cancel() - sync = d.syncState(pivot.Root) - + sync = d.restartSnapSync(sync, pivot.Root, pivot.Number.Uint64()) go closeOnErr(sync) } } @@ -966,9 +993,7 @@ func (d *Downloader) processSnapSyncContent() error { if P != nil { // If new pivot block found, cancel old state retrieval and restart if oldPivot != P { - sync.Cancel() - sync = d.syncState(P.Header.Root) - + sync = d.restartSnapSync(sync, P.Header.Root, P.Header.Number.Uint64()) go closeOnErr(sync) oldPivot = P } @@ -1086,7 +1111,12 @@ func (d *Downloader) DeliverSnapPacket(peer *snap.Peer, packet snap.Packet) erro return d.SnapSyncer.OnByteCodes(peer, packet.ID, packet.Codes) case *snap.TrieNodesPacket: - return d.SnapSyncer.OnTrieNodes(peer, packet.ID, packet.Nodes) + // Snap/2 no longer requests trie nodes. Stale responses from + // snap/1 peers are silently ignored. + return nil + + case *snap.AccessListsPacket: + return d.SnapSyncer.OnAccessLists(peer, packet.ID, packet.AccessLists) default: return fmt.Errorf("unexpected snap packet type: %T", packet) diff --git a/eth/downloader/downloader_test.go b/eth/downloader/downloader_test.go index 6d5d159631..bc0e807a3b 100644 --- a/eth/downloader/downloader_test.go +++ b/eth/downloader/downloader_test.go @@ -373,20 +373,15 @@ func (dlp *downloadTesterPeer) RequestByteCodes(id uint64, hashes []common.Hash, return nil } -// RequestTrieNodes fetches a batch of account or storage trie nodes. -func (dlp *downloadTesterPeer) RequestTrieNodes(id uint64, root common.Hash, count int, paths []snap.TrieNodePathSet, bytes int) error { - encPaths, err := rlp.EncodeToRawList(paths) - if err != nil { - panic(err) +// RequestAccessLists fetches a batch of BALs by block hash. +func (dlp *downloadTesterPeer) RequestAccessLists(id uint64, hashes []common.Hash, bytes int) error { + req := &snap.GetAccessListsPacket{ + ID: id, + Hashes: hashes, + Bytes: uint64(bytes), } - req := &snap.GetTrieNodesPacket{ - ID: id, - Root: root, - Paths: encPaths, - Bytes: uint64(bytes), - } - nodes, _ := snap.ServiceGetTrieNodesQuery(dlp.chain, req) - go dlp.dl.downloader.SnapSyncer.OnTrieNodes(dlp, id, nodes) + als := snap.ServiceGetAccessListsQuery(dlp.chain, req) + go dlp.dl.downloader.SnapSyncer.OnAccessLists(dlp, id, als) return nil } diff --git a/eth/downloader/statesync.go b/eth/downloader/statesync.go index 501af63ed5..873a190af7 100644 --- a/eth/downloader/statesync.go +++ b/eth/downloader/statesync.go @@ -23,10 +23,10 @@ import ( "github.com/ethereum/go-ethereum/log" ) -// syncState starts downloading state with the given root hash. -func (d *Downloader) syncState(root common.Hash) *stateSync { +// syncState starts downloading state with the given root hash and block number. +func (d *Downloader) syncState(root common.Hash, number uint64) *stateSync { // Create the state sync - s := newStateSync(d, root) + s := newStateSync(d, root, number) select { case d.stateSyncStart <- s: // If we tell the statesync to restart with a new root, we also need @@ -77,8 +77,9 @@ func (d *Downloader) runStateSync(s *stateSync) *stateSync { // stateSync schedules requests for downloading a particular state trie defined // by a given state root. type stateSync struct { - d *Downloader // Downloader instance to access and manage current peerset - root common.Hash // State root currently being synced + d *Downloader // Downloader instance to access and manage current peerset + root common.Hash // State root currently being synced + number uint64 // Block number of the pivot started chan struct{} // Started is signalled once the sync loop starts cancel chan struct{} // Channel to signal a termination request @@ -89,10 +90,11 @@ type stateSync struct { // newStateSync creates a new state trie download scheduler. This method does not // yet start the sync. The user needs to call run to initiate. -func newStateSync(d *Downloader, root common.Hash) *stateSync { +func newStateSync(d *Downloader, root common.Hash, number uint64) *stateSync { return &stateSync{ d: d, root: root, + number: number, cancel: make(chan struct{}), done: make(chan struct{}), started: make(chan struct{}), @@ -104,7 +106,7 @@ func newStateSync(d *Downloader, root common.Hash) *stateSync { // finish. func (s *stateSync) run() { close(s.started) - s.err = s.d.SnapSyncer.Sync(s.root, s.cancel) + s.err = s.d.SnapSyncer.Sync(s.root, s.number, s.cancel) close(s.done) } diff --git a/eth/protocols/snap/bal_apply.go b/eth/protocols/snap/bal_apply.go new file mode 100644 index 0000000000..5ec9d21420 --- /dev/null +++ b/eth/protocols/snap/bal_apply.go @@ -0,0 +1,120 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snap + +import ( + "bytes" + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/core/types/bal" + "github.com/ethereum/go-ethereum/crypto" + "github.com/holiman/uint256" +) + +// verifyAccessList checks that the given block access list matches the hash +// committed in the block header. +func verifyAccessList(b *bal.BlockAccessList, header *types.Header) error { + if header.BlockAccessListHash == nil { + return fmt.Errorf("header %d has no access list hash", header.Number) + } + have := b.Hash() + if have != *header.BlockAccessListHash { + return fmt.Errorf("access list hash mismatch for block %d: have %v, want %v", header.Number, have, *header.BlockAccessListHash) + } + return nil +} + +// applyAccessList applies a single block's access list diffs to the flat state +// in the database. For each account, it applies the post-block values (highest +// TxIdx entry) for balance, nonce, code, and storage. The storageRoot field is +// intentionally left stale. It will be recomputed during the trie rebuild. +func (s *Syncer) applyAccessList(b *bal.BlockAccessList) error { + batch := s.db.NewBatch() + + for _, access := range b.Accesses { + addr := common.Address(access.Address) + accountHash := crypto.Keccak256Hash(addr[:]) + + // Read the existing account from flat state (may not exist yet) + var ( + account types.StateAccount + isNew bool + ) + if data := rawdb.ReadAccountSnapshot(s.db, accountHash); len(data) > 0 { + existing, err := types.FullAccount(data) + if err != nil { + return fmt.Errorf("failed to decode account %v: %w", addr, err) + } + account = *existing + } else { + // New account — initialize with defaults + isNew = true + account.Balance = new(uint256.Int) + account.Root = types.EmptyRootHash + account.CodeHash = types.EmptyCodeHash[:] + } + + // Apply balance change (last entry = post-block state) + if n := len(access.BalanceChanges); n > 0 { + raw := access.BalanceChanges[n-1].Balance + account.Balance = new(uint256.Int).SetBytes(raw[:]) + } + + // Apply nonce change (last entry = post-block state) + if n := len(access.NonceChanges); n > 0 { + account.Nonce = access.NonceChanges[n-1].Nonce + } + + // Apply code change (last entry = post-block state) + if n := len(access.CodeChanges); n > 0 { + code := access.CodeChanges[n-1].Code + if len(code) > 0 { + codeHash := crypto.Keccak256(code) + rawdb.WriteCode(batch, common.BytesToHash(codeHash), code) + account.CodeHash = codeHash + } else { + account.CodeHash = types.EmptyCodeHash[:] + } + } + + // Apply storage writes (last entry per slot = post-block state) + for _, slotWrites := range access.StorageWrites { + if n := len(slotWrites.Accesses); n > 0 { + value := slotWrites.Accesses[n-1].ValueAfter + storageHash := crypto.Keccak256Hash(slotWrites.Slot[:]) + rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, value[:]) + } + } + + // Don't create empty accounts in flat state (EIP-161). + // This handles the case where an account is created and + // self-destructed in the same transaction. The BAL will + // include it with a balance change to zero, but the account + // should not exist in state. + if isNew && account.Balance.IsZero() && account.Nonce == 0 && + bytes.Equal(account.CodeHash, types.EmptyCodeHash[:]) { + continue + } + + // Write the updated account (storageRoot intentionally left stale) + rawdb.WriteAccountSnapshot(batch, accountHash, types.SlimAccountRLP(account)) + } + return batch.Write() +} diff --git a/eth/protocols/snap/bal_apply_test.go b/eth/protocols/snap/bal_apply_test.go new file mode 100644 index 0000000000..acb6d35a14 --- /dev/null +++ b/eth/protocols/snap/bal_apply_test.go @@ -0,0 +1,299 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snap + +import ( + "bytes" + "math/big" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/core/types/bal" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/rlp" + "github.com/holiman/uint256" +) + +// buildTestBAL constructs a BlockAccessList from a ConstructionBlockAccessList +// by RLP round-tripping (construction types use unexported encoding types). +func buildTestBAL(t *testing.T, cb *bal.ConstructionBlockAccessList) *bal.BlockAccessList { + t.Helper() + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatalf("failed to encode BAL: %v", err) + } + var b bal.BlockAccessList + if err := rlp.DecodeBytes(buf.Bytes(), &b); err != nil { + t.Fatalf("failed to decode BAL: %v", err) + } + return &b +} + +// TestAccessListVerification checks that verifyAccessList accepts valid BALs +// and rejects tampered ones. +func TestAccessListVerification(t *testing.T) { + t.Parallel() + + cb := bal.NewConstructionBlockAccessList() + addr := common.HexToAddress("0x01") + cb.BalanceChange(0, addr, uint256.NewInt(100)) + + b := buildTestBAL(t, &cb) + correctHash := b.Hash() + + // Valid: hash matches header + header := &types.Header{ + Number: big.NewInt(1), + BlockAccessListHash: &correctHash, + } + if err := verifyAccessList(b, header); err != nil { + t.Fatalf("valid access list rejected: %v", err) + } + // Invalid: wrong hash in header + wrongHash := common.HexToHash("0xdead") + badHeader := &types.Header{ + Number: big.NewInt(1), + BlockAccessListHash: &wrongHash, + } + if err := verifyAccessList(b, badHeader); err == nil { + t.Fatal("tampered access list accepted") + } + // Invalid: no hash in header + noHashHeader := &types.Header{ + Number: big.NewInt(1), + } + if err := verifyAccessList(b, noHashHeader); err == nil { + t.Fatal("header without access list hash accepted") + } +} + +// TestAccessListApplication verifies that applyAccessList correctly updates +// flat state (balance, nonce, code, storage) and leaves storageRoot stale. +func TestAccessListApplication(t *testing.T) { + t.Parallel() + db := rawdb.NewMemoryDatabase() + syncer := NewSyncer(db, rawdb.HashScheme) + addr := common.HexToAddress("0x01") + accountHash := crypto.Keccak256Hash(addr[:]) + + // Write an existing account to flat state + original := types.StateAccount{ + Nonce: 5, + Balance: uint256.NewInt(1000), + Root: common.HexToHash("0xbeef"), // intentionally non-empty + CodeHash: types.EmptyCodeHash[:], + } + rawdb.WriteAccountSnapshot(db, accountHash, types.SlimAccountRLP(original)) + + // Write an existing storage slot. The BAL uses raw slot keys, but the + // snapshot layer stores slots under keccak256(slot). + rawSlot := common.HexToHash("0xaa") + slotHash := crypto.Keccak256Hash(rawSlot[:]) + rawdb.WriteStorageSnapshot(db, accountHash, slotHash, common.HexToHash("0x01").Bytes()) + + // Build a BAL that changes balance, nonce, code, and storage + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, addr, uint256.NewInt(2000)) + cb.NonceChange(addr, 0, 6) + cb.CodeChange(addr, 0, []byte{0x60, 0x00}) // PUSH1 0x00 + cb.StorageWrite(0, addr, rawSlot, common.HexToHash("0x02")) + b := buildTestBAL(t, &cb) + if err := syncer.applyAccessList(b); err != nil { + t.Fatalf("applyAccessList failed: %v", err) + } + + // Verify account fields updated + data := rawdb.ReadAccountSnapshot(db, accountHash) + if len(data) == 0 { + t.Fatal("account snapshot missing after apply") + } + updated, err := types.FullAccount(data) + if err != nil { + t.Fatalf("failed to decode updated account: %v", err) + } + if updated.Balance.Cmp(uint256.NewInt(2000)) != 0 { + t.Errorf("balance wrong: got %v, want 2000", updated.Balance) + } + if updated.Nonce != 6 { + t.Errorf("nonce wrong: got %d, want 6", updated.Nonce) + } + wantCodeHash := crypto.Keccak256([]byte{0x60, 0x00}) + if !bytes.Equal(updated.CodeHash, wantCodeHash) { + t.Errorf("code hash wrong: got %x, want %x", updated.CodeHash, wantCodeHash) + } + + // Verify code was written + if code := rawdb.ReadCode(db, common.BytesToHash(wantCodeHash)); !bytes.Equal(code, []byte{0x60, 0x00}) { + t.Errorf("code wrong: got %x, want 6000", code) + } + + // Verify storage updated + storageVal := rawdb.ReadStorageSnapshot(db, accountHash, slotHash) + if !bytes.Equal(storageVal, common.HexToHash("0x02").Bytes()) { + t.Errorf("storage wrong: got %x, want %x", storageVal, common.HexToHash("0x02").Bytes()) + } + + // Verify storageRoot left stale (unchanged from original) + if updated.Root != original.Root { + t.Errorf("storageRoot should be stale: got %v, want %v", updated.Root, original.Root) + } +} + +// TestAccessListApplicationMultiTx verifies that when an account has multiple +// changes at different transaction indices, only the highest index (post-block +// state) is applied. +func TestAccessListApplicationMultiTx(t *testing.T) { + t.Parallel() + db := rawdb.NewMemoryDatabase() + syncer := NewSyncer(db, rawdb.HashScheme) + addr := common.HexToAddress("0x02") + accountHash := crypto.Keccak256Hash(addr[:]) + + // Write initial account + original := types.StateAccount{ + Nonce: 0, + Balance: uint256.NewInt(100), + Root: types.EmptyRootHash, + CodeHash: types.EmptyCodeHash[:], + } + rawdb.WriteAccountSnapshot(db, accountHash, types.SlimAccountRLP(original)) + + // Build BAL with multiple balance/nonce changes at different tx indices + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, addr, uint256.NewInt(200)) // tx 0 + cb.BalanceChange(3, addr, uint256.NewInt(500)) // tx 3 + cb.BalanceChange(7, addr, uint256.NewInt(9999)) // tx 7 (final) + cb.NonceChange(addr, 0, 1) // tx 0 + cb.NonceChange(addr, 3, 2) // tx 3 + cb.NonceChange(addr, 7, 3) // tx 7 (final) + b := buildTestBAL(t, &cb) + if err := syncer.applyAccessList(b); err != nil { + t.Fatalf("applyAccessList failed: %v", err) + } + data := rawdb.ReadAccountSnapshot(db, accountHash) + updated, err := types.FullAccount(data) + if err != nil { + t.Fatalf("failed to decode updated account: %v", err) + } + + // Only the highest tx index values should be applied + if updated.Balance.Cmp(uint256.NewInt(9999)) != 0 { + t.Errorf("balance wrong: got %v, want 9999", updated.Balance) + } + if updated.Nonce != 3 { + t.Errorf("nonce wrong: got %d, want 3", updated.Nonce) + } +} + +// TestAccessListApplicationNewAccount verifies that applyAccessList creates +// new accounts that don't exist in the DB yet. +func TestAccessListApplicationNewAccount(t *testing.T) { + t.Parallel() + + db := rawdb.NewMemoryDatabase() + syncer := NewSyncer(db, rawdb.HashScheme) + + addr := common.HexToAddress("0x03") + accountHash := crypto.Keccak256Hash(addr[:]) + + // Verify account doesn't exist + if data := rawdb.ReadAccountSnapshot(db, accountHash); len(data) > 0 { + t.Fatal("account should not exist yet") + } + + // Build BAL for a new account. BAL uses raw slot keys. + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, addr, uint256.NewInt(42)) + cb.NonceChange(addr, 0, 1) + rawSlot := common.HexToHash("0xbb") + cb.StorageWrite(0, addr, rawSlot, common.HexToHash("0xff")) + b := buildTestBAL(t, &cb) + if err := syncer.applyAccessList(b); err != nil { + t.Fatalf("applyAccessList failed: %v", err) + } + + // Verify account was created + data := rawdb.ReadAccountSnapshot(db, accountHash) + if len(data) == 0 { + t.Fatal("account should exist after apply") + } + account, err := types.FullAccount(data) + if err != nil { + t.Fatalf("failed to decode new account: %v", err) + } + if account.Balance.Cmp(uint256.NewInt(42)) != 0 { + t.Errorf("balance wrong: got %v, want 42", account.Balance) + } + if account.Nonce != 1 { + t.Errorf("nonce wrong: got %d, want 1", account.Nonce) + } + if account.Root != types.EmptyRootHash { + t.Errorf("root should be empty for new account: got %v", account.Root) + } + + // Verify storage was written under keccak256(rawSlot) + slotHash := crypto.Keccak256Hash(rawSlot[:]) + storageVal := rawdb.ReadStorageSnapshot(db, accountHash, slotHash) + if !bytes.Equal(storageVal, common.HexToHash("0xff").Bytes()) { + t.Errorf("storage wrong: got %x, want %x", storageVal, common.HexToHash("0xff").Bytes()) + } +} + +// TestAccessListApplicationSameTxCreateDestroy tests the edge case where an +// account is created and self-destructed in the same transaction during the +// pivot gap. Per EIP-7928, such accounts appear in the BAL with a balance +// change to zero but no nonce or code changes. Since the account didn't exist +// at the old pivot and doesn't exist at the new pivot (destroyed), +// applyAccessList should not leave a zero-balance account in the snapshot. +// Per EIP-161, empty accounts (zero balance, zero nonce, no code) must not exist +// in state. +func TestAccessListApplicationSameTxCreateDestroy(t *testing.T) { + t.Parallel() + db := rawdb.NewMemoryDatabase() + syncer := NewSyncer(db, rawdb.HashScheme) + addr := common.HexToAddress("0x04") + accountHash := crypto.Keccak256Hash(addr[:]) + + // Verify account doesn't exist before apply + if data := rawdb.ReadAccountSnapshot(db, accountHash); len(data) > 0 { + t.Fatal("account should not exist yet") + } + + // Build a BAL mimicking same-tx create+destroy: the account appears + // with a balance change to zero and nothing else. + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, addr, uint256.NewInt(0)) + b := buildTestBAL(t, &cb) + if err := syncer.applyAccessList(b); err != nil { + t.Fatalf("applyAccessList failed: %v", err) + } + + // Check if applyAccessList created an account. + data := rawdb.ReadAccountSnapshot(db, accountHash) + if len(data) > 0 { + // Account was created + account, err := types.FullAccount(data) + if err != nil { + t.Fatalf("failed to decode account: %v", err) + } + t.Errorf("account created for same-tx create+destroy: "+ + "balance=%v, nonce=%d, codeHash=%x, root=%v", + account.Balance, account.Nonce, account.CodeHash, account.Root) + } +} diff --git a/eth/protocols/snap/gentrie.go b/eth/protocols/snap/gentrie.go index 5126d26777..fd8b5e1f0d 100644 --- a/eth/protocols/snap/gentrie.go +++ b/eth/protocols/snap/gentrie.go @@ -25,20 +25,6 @@ import ( "github.com/ethereum/go-ethereum/trie" ) -// genTrie interface is used by the snap syncer to generate merkle tree nodes -// based on a received batch of states. -type genTrie interface { - // update inserts the state item into generator trie. - update(key, value []byte) error - - // delete removes the state item from the generator trie. - delete(key []byte) error - - // commit flushes the right boundary nodes if complete flag is true. This - // function must be called before flushing the associated database batch. - commit(complete bool) common.Hash -} - // pathTrie is a wrapper over the stackTrie, incorporating numerous additional // logics to handle the semi-completed trie and potential leftover dangling // nodes in the database. It is utilized for constructing the merkle tree nodes @@ -292,30 +278,3 @@ func (t *pathTrie) commit(complete bool) common.Hash { } // hashTrie is a wrapper over the stackTrie for implementing genTrie interface. -type hashTrie struct { - tr *trie.StackTrie -} - -// newHashTrie initializes the hash trie. -func newHashTrie(batch ethdb.Batch) *hashTrie { - return &hashTrie{tr: trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { - rawdb.WriteLegacyTrieNode(batch, hash, blob) - })} -} - -// update implements genTrie interface, inserting a (key, value) pair into -// the stack trie. -func (t *hashTrie) update(key, value []byte) error { - return t.tr.Update(key, value) -} - -// delete implements genTrie interface, ignoring the state item for deleting. -func (t *hashTrie) delete(key []byte) error { return nil } - -// commit implements genTrie interface, committing the nodes on right boundary. -func (t *hashTrie) commit(complete bool) common.Hash { - if !complete { - return common.Hash{} // the hash is meaningless for incomplete commit - } - return t.tr.Hash() // return hash only if it's claimed as complete -} diff --git a/eth/protocols/snap/handlers.go b/eth/protocols/snap/handlers.go index 5a5733bdb4..22822967cb 100644 --- a/eth/protocols/snap/handlers.go +++ b/eth/protocols/snap/handlers.go @@ -598,3 +598,16 @@ func ServiceGetAccessListsQuery(chain *core.BlockChain, req *GetAccessListsPacke } return response } + +// nolint:unused +func handleAccessLists(backend Backend, msg Decoder, peer *Peer) error { + res := new(AccessListsPacket) + if err := msg.Decode(res); err != nil { + return fmt.Errorf("%w: message %v: %v", errDecode, msg, err) + } + tresp := tracker.Response{ID: res.ID, MsgCode: AccessListsMsg, Size: res.AccessLists.Len()} + if err := peer.tracker.Fulfil(tresp); err != nil { + return fmt.Errorf("BALs: %w", err) + } + return backend.Handle(peer, res) +} diff --git a/eth/protocols/snap/metrics.go b/eth/protocols/snap/metrics.go index 6319a9b75d..9b15f4d823 100644 --- a/eth/protocols/snap/metrics.go +++ b/eth/protocols/snap/metrics.go @@ -58,15 +58,8 @@ var ( // to retrieved concurrently. largeStorageGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/large", nil) - // skipStorageHealingGauge is the metric to track how many storages are retrieved - // in multiple requests but healing is not necessary. - skipStorageHealingGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/noheal", nil) - // largeStorageDiscardGauge is the metric to track how many chunked storages are // discarded during the snap sync. largeStorageDiscardGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/chunk/discard", nil) largeStorageResumedGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/chunk/resume", nil) - - stateSyncTimeGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/time/statesync", nil) - stateHealTimeGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/time/stateheal", nil) ) diff --git a/eth/protocols/snap/peer.go b/eth/protocols/snap/peer.go index 0b96de4158..484bc66a2a 100644 --- a/eth/protocols/snap/peer.go +++ b/eth/protocols/snap/peer.go @@ -23,7 +23,6 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/p2p" "github.com/ethereum/go-ethereum/p2p/tracker" - "github.com/ethereum/go-ethereum/rlp" ) // Peer is a collection of relevant information we have about a `snap` peer. @@ -154,25 +153,21 @@ func (p *Peer) RequestByteCodes(id uint64, hashes []common.Hash, bytes int) erro }) } -// RequestTrieNodes fetches a batch of account or storage trie nodes rooted in -// a specific state trie. The `count` is the total count of paths being requested. -func (p *Peer) RequestTrieNodes(id uint64, root common.Hash, count int, paths []TrieNodePathSet, bytes int) error { - p.logger.Trace("Fetching set of trie nodes", "reqid", id, "root", root, "pathsets", len(paths), "bytes", common.StorageSize(bytes)) - +// RequestAccessLists fetches a batch of BALs by block hash. +func (p *Peer) RequestAccessLists(id uint64, hashes []common.Hash, bytes int) error { + p.logger.Trace("Fetching set of BALs", "reqid", id, "hashes", len(hashes), "bytes", common.StorageSize(bytes)) err := p.tracker.Track(tracker.Request{ - ReqCode: GetTrieNodesMsg, - RespCode: TrieNodesMsg, + ReqCode: GetAccessListsMsg, + RespCode: AccessListsMsg, ID: id, - Size: count, // TrieNodes is limited by number of items. + Size: len(hashes), }) if err != nil { return err } - encPaths, _ := rlp.EncodeToRawList(paths) - return p2p.Send(p.rw, GetTrieNodesMsg, &GetTrieNodesPacket{ - ID: id, - Root: root, - Paths: encPaths, - Bytes: uint64(bytes), + return p2p.Send(p.rw, GetAccessListsMsg, &GetAccessListsPacket{ + ID: id, + Hashes: hashes, + Bytes: uint64(bytes), }) } diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 841bfb446e..5537c441e8 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -21,19 +21,17 @@ import ( "encoding/json" "errors" "fmt" - gomath "math" "math/big" "math/rand" "sort" "sync" - "sync/atomic" "time" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/math" "github.com/ethereum/go-ethereum/core/rawdb" - "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/core/types/bal" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/event" @@ -42,6 +40,7 @@ import ( "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/triedb" ) const ( @@ -65,37 +64,15 @@ const ( // come close to that, requesting 4x should be a good approximation. maxCodeRequestCount = maxRequestSize / (24 * 1024) * 4 - // maxTrieRequestCount is the maximum number of trie node blobs to request in - // a single query. If this number is too low, we're not filling responses fully - // and waste round trip times. If it's too high, we're capping responses and - // waste bandwidth. - maxTrieRequestCount = maxRequestSize / 512 - - // trienodeHealRateMeasurementImpact is the impact a single measurement has on - // the local node's trienode processing capacity. A value closer to 0 reacts - // slower to sudden changes, but it is also more stable against temporary hiccups. - trienodeHealRateMeasurementImpact = 0.005 - - // minTrienodeHealThrottle is the minimum divisor for throttling trie node - // heal requests to avoid overloading the local node and excessively expanding - // the state trie breadth wise. - minTrienodeHealThrottle = 1 - - // maxTrienodeHealThrottle is the maximum divisor for throttling trie node - // heal requests to avoid overloading the local node and exessively expanding - // the state trie bedth wise. - maxTrienodeHealThrottle = maxTrieRequestCount - - // trienodeHealThrottleIncrease is the multiplier for the throttle when the - // rate of arriving data is higher than the rate of processing it. - trienodeHealThrottleIncrease = 1.33 - - // trienodeHealThrottleDecrease is the divisor for the throttle when the - // rate of arriving data is lower than the rate of processing it. - trienodeHealThrottleDecrease = 1.25 - - // batchSizeThreshold is the maximum size allowed for gentrie batch. - batchSizeThreshold = 8 * 1024 * 1024 + // maxAccessListRequestCount is the maximum number of block access lists to + // request in a single query. BALs average ~72 KiB compressed (per EIP-7928), + // and EIP-8189 recommends a 2 MiB response soft limit, so we target ~28 + // blocks per request to avoid server-side truncation. + // + // NOTE: If the gas limit is raised significantly, this number may need to be adjusted + // to avoid server-side truncation and re-requesting. It is currently based on + // the assumption that the gas limit is 60M. + maxAccessListRequestCount = 28 ) var ( @@ -228,71 +205,21 @@ type storageResponse struct { cont bool // Whether the last storage range has a continuation } -// trienodeHealRequest tracks a pending state trie request to ensure responses -// are to actual requests and to validate any security constraints. -// -// Concurrency note: trie node requests and responses are handled concurrently from -// the main runloop to allow Keccak256 hash verifications on the peer's thread and -// to drop on invalid response. The request struct must contain all the data to -// construct the response without accessing runloop internals (i.e. task). That -// is only included to allow the runloop to match a response to the task being -// synced without having yet another set of maps. -type trienodeHealRequest struct { - peer string // Peer to which this request is assigned - id uint64 // Request ID of this request - time time.Time // Timestamp when the request was sent - - deliver chan *trienodeHealResponse // Channel to deliver successful response on - revert chan *trienodeHealRequest // Channel to deliver request failure on - cancel chan struct{} // Channel to track sync cancellation - timeout *time.Timer // Timer to track delivery timeout - stale chan struct{} // Channel to signal the request was dropped - - paths []string // Trie node paths for identifying trie node - hashes []common.Hash // Trie node hashes to validate responses - - task *healTask // Task which this request is filling (only access fields through the runloop!!) +type accessListRequest struct { + peer string // Peer to which this request is assigned + id uint64 // Request ID of this request + hashes []common.Hash // Block hashes corresponding to requested BALs + time time.Time // Timestamp when the request was sent + timeout *time.Timer // Timer to track the delivery timeout + deliver chan *accessListResponse // Channel to deliver successful response on + revert chan *accessListRequest // Channel to deliver request failure on + cancel chan struct{} // Channel to track sync cancellation + stale chan struct{} // Channel to signal the request was dropped } -// trienodeHealResponse is an already verified remote response to a trie node request. -type trienodeHealResponse struct { - task *healTask // Task which this request is filling - - paths []string // Paths of the trie nodes - hashes []common.Hash // Hashes of the trie nodes to avoid double hashing - nodes [][]byte // Actual trie nodes to store into the database (nil = missing) -} - -// bytecodeHealRequest tracks a pending bytecode request to ensure responses are to -// actual requests and to validate any security constraints. -// -// Concurrency note: bytecode requests and responses are handled concurrently from -// the main runloop to allow Keccak256 hash verifications on the peer's thread and -// to drop on invalid response. The request struct must contain all the data to -// construct the response without accessing runloop internals (i.e. task). That -// is only included to allow the runloop to match a response to the task being -// synced without having yet another set of maps. -type bytecodeHealRequest struct { - peer string // Peer to which this request is assigned - id uint64 // Request ID of this request - time time.Time // Timestamp when the request was sent - - deliver chan *bytecodeHealResponse // Channel to deliver successful response on - revert chan *bytecodeHealRequest // Channel to deliver request failure on - cancel chan struct{} // Channel to track sync cancellation - timeout *time.Timer // Timer to track delivery timeout - stale chan struct{} // Channel to signal the request was dropped - - hashes []common.Hash // Bytecode hashes to validate responses - task *healTask // Task which this request is filling (only access fields through the runloop!!) -} - -// bytecodeHealResponse is an already verified remote response to a bytecode request. -type bytecodeHealResponse struct { - task *healTask // Task which this request is filling - - hashes []common.Hash // Hashes of the bytecode to avoid double hashing - codes [][]byte // Actual bytecodes to store into the database (nil = missing) +type accessListResponse struct { + req *accessListRequest + accessLists []rlp.RawValue } // accountTask represents the sync task for a chunk of the account snapshot. @@ -323,9 +250,6 @@ type accountTask struct { stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval stateCompleted map[common.Hash]struct{} // Account hashes whose storage have been completed - genBatch ethdb.Batch // Batch used by the node generator - genTrie genTrie // Node generator from storage slots - done bool // Flag whether the task can be removed } @@ -361,25 +285,16 @@ type storageTask struct { root common.Hash // Storage root hash for this instance req *storageRequest // Pending request to fill this task - genBatch ethdb.Batch // Batch used by the node generator - genTrie genTrie // Node generator from storage slots - done bool // Flag whether the task can be removed } -// healTask represents the sync task for healing the snap-synced chunk boundaries. -type healTask struct { - scheduler *trie.Sync // State trie sync scheduler defining the tasks - - trieTasks map[string]common.Hash // Set of trie node tasks currently queued for retrieval, indexed by node path - codeTasks map[common.Hash]struct{} // Set of byte code tasks currently queued for retrieval, indexed by code hash -} - // SyncProgress is a database entry to allow suspending and resuming a snapshot state // sync. Opposed to full and fast sync, there is no way to restart a suspended // snap sync without prior knowledge of the suspension point. type SyncProgress struct { - Tasks []*accountTask // The suspended account tasks (contract tasks within) + Root common.Hash // State root being synced (for pivot move detection) + BlockNumber uint64 // Block number of the pivot + Tasks []*accountTask // The suspended account tasks (contract tasks within) // Status report during syncing phase AccountSynced uint64 // Number of accounts downloaded @@ -389,18 +304,6 @@ type SyncProgress struct { StorageSynced uint64 // Number of storage slots downloaded StorageBytes common.StorageSize // Number of storage trie bytes persisted to disk - // Status report during healing phase - TrienodeHealSynced uint64 // Number of state trie nodes downloaded - TrienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk - BytecodeHealSynced uint64 // Number of bytecodes downloaded - BytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk -} - -// SyncPending is analogous to SyncProgress, but it's used to report on pending -// ephemeral sync progress that doesn't get persisted into the database. -type SyncPending struct { - TrienodeHeal uint64 // Number of state trie nodes pending - BytecodeHeal uint64 // Number of bytecodes pending } // SyncPeer abstracts out the methods required for a peer to be synced against @@ -422,18 +325,17 @@ type SyncPeer interface { // RequestByteCodes fetches a batch of bytecodes by hash. RequestByteCodes(id uint64, hashes []common.Hash, bytes int) error - // RequestTrieNodes fetches a batch of account or storage trie nodes rooted in - // a specific state trie. - RequestTrieNodes(id uint64, root common.Hash, count int, paths []TrieNodePathSet, bytes int) error + // RequestAccessLists fetches a batch of BALs by block hash. + RequestAccessLists(id uint64, hashes []common.Hash, bytes int) error // Log retrieves the peer's own contextual logger. Log() log.Logger } -// Syncer is an Ethereum account and storage trie syncer based on snapshots and -// the snap protocol. It's purpose is to download all the accounts and storage -// slots from remote peers and reassemble chunks of the state trie, on top of -// which a state sync can be run to fix any gaps / overlaps. +// Syncer is an Ethereum account and storage trie syncer based on the snap +// protocol. It downloads all accounts, storage slots, and bytecodes from +// remote peers as flat state, applies BAL diffs on pivot moves, +// and triggers a final trie rebuild once flat state is consistent. // // Every network request has a variety of failure events: // - The peer disconnects after task assignment, failing to send the request @@ -442,14 +344,15 @@ type SyncPeer interface { // - The peer delivers a stale response after a previous timeout // - The peer delivers a refusal to serve the requested state type Syncer struct { - db ethdb.KeyValueStore // Database to store the trie nodes into (and dedup) - scheme string // Node scheme used in node database + db ethdb.Database // Database to store the trie nodes into (and dedup) + scheme string // Node scheme used in node database - root common.Hash // Current state trie root being synced - tasks []*accountTask // Current account task set being synced - snapped bool // Flag to signal that snap phase is done - healer *healTask // Current state healing task being executed - update chan struct{} // Notification channel for possible sync progression + root common.Hash // Current state trie root being synced + number uint64 // Block number of the current pivot + previousRoot common.Hash // Root from previous sync run (for pivot move detection) + previousNumber uint64 // Block number of the previous pivot + tasks []*accountTask // Current account task set being synced + update chan struct{} // Notification channel for possible sync progression peers map[string]SyncPeer // Currently active peers to download from peerJoin *event.Feed // Event feed to react to peers joining @@ -457,14 +360,16 @@ type Syncer struct { rates *msgrate.Trackers // Message throughput rates for peers // Request tracking during syncing phase - statelessPeers map[string]struct{} // Peers that failed to deliver state data - accountIdlers map[string]struct{} // Peers that aren't serving account requests - bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests - storageIdlers map[string]struct{} // Peers that aren't serving storage requests + statelessPeers map[string]struct{} // Peers that failed to deliver state data + accountIdlers map[string]struct{} // Peers that aren't serving account requests + bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests + storageIdlers map[string]struct{} // Peers that aren't serving storage requests + accessListIdlers map[string]struct{} // Peers that aren't serving access list requests - accountReqs map[uint64]*accountRequest // Account requests currently running - bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running - storageReqs map[uint64]*storageRequest // Storage requests currently running + accountReqs map[uint64]*accountRequest // Account requests currently running + bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running + storageReqs map[uint64]*storageRequest // Storage requests currently running + accessListReqs map[uint64]*accessListRequest // Access list requests currently running accountSynced uint64 // Number of accounts downloaded accountBytes common.StorageSize // Number of account trie bytes persisted to disk @@ -475,37 +380,8 @@ type Syncer struct { extProgress *SyncProgress // progress that can be exposed to external caller. - // Request tracking during healing phase - trienodeHealIdlers map[string]struct{} // Peers that aren't serving trie node requests - bytecodeHealIdlers map[string]struct{} // Peers that aren't serving bytecode requests - - trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running - bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running - - trienodeHealRate float64 // Average heal rate for processing trie node data - trienodeHealPend atomic.Uint64 // Number of trie nodes currently pending for processing - trienodeHealThrottle float64 // Divisor for throttling the amount of trienode heal data requested - trienodeHealThrottled time.Time // Timestamp the last time the throttle was updated - - trienodeHealSynced uint64 // Number of state trie nodes downloaded - trienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk - trienodeHealDups uint64 // Number of state trie nodes already processed - trienodeHealNops uint64 // Number of state trie nodes not requested - bytecodeHealSynced uint64 // Number of bytecodes downloaded - bytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk - bytecodeHealDups uint64 // Number of bytecodes already processed - bytecodeHealNops uint64 // Number of bytecodes not requested - - stateWriter ethdb.Batch // Shared batch writer used for persisting raw states - accountHealed uint64 // Number of accounts downloaded during the healing stage - accountHealedBytes common.StorageSize // Number of raw account bytes persisted to disk during the healing stage - storageHealed uint64 // Number of storage slots downloaded during the healing stage - storageHealedBytes common.StorageSize // Number of raw storage bytes persisted to disk during the healing stage - - startTime time.Time // Time instance when snapshot sync started - healStartTime time.Time // Time instance when the state healing started - syncTimeOnce sync.Once // Ensure that the state sync time is uploaded only once - logTime time.Time // Time instance when status was last reported + startTime time.Time // Time instance when snapshot sync started + logTime time.Time // Time instance when status was last reported pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown lock sync.RWMutex // Protects fields that can change outside of sync (peers, reqs, root) @@ -513,7 +389,7 @@ type Syncer struct { // NewSyncer creates a new snapshot syncer to download the Ethereum state over the // snap protocol. -func NewSyncer(db ethdb.KeyValueStore, scheme string) *Syncer { +func NewSyncer(db ethdb.Database, scheme string) *Syncer { return &Syncer{ db: db, scheme: scheme, @@ -524,21 +400,16 @@ func NewSyncer(db ethdb.KeyValueStore, scheme string) *Syncer { rates: msgrate.NewTrackers(log.New("proto", "snap")), update: make(chan struct{}, 1), - accountIdlers: make(map[string]struct{}), - storageIdlers: make(map[string]struct{}), - bytecodeIdlers: make(map[string]struct{}), + statelessPeers: make(map[string]struct{}), + accountIdlers: make(map[string]struct{}), + storageIdlers: make(map[string]struct{}), + bytecodeIdlers: make(map[string]struct{}), + accessListIdlers: make(map[string]struct{}), - accountReqs: make(map[uint64]*accountRequest), - storageReqs: make(map[uint64]*storageRequest), - bytecodeReqs: make(map[uint64]*bytecodeRequest), - - trienodeHealIdlers: make(map[string]struct{}), - bytecodeHealIdlers: make(map[string]struct{}), - - trienodeHealReqs: make(map[uint64]*trienodeHealRequest), - bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest), - trienodeHealThrottle: maxTrienodeHealThrottle, // Tune downward instead of insta-filling with junk - stateWriter: db.NewBatch(), + accountReqs: make(map[uint64]*accountRequest), + storageReqs: make(map[uint64]*storageRequest), + bytecodeReqs: make(map[uint64]*bytecodeRequest), + accessListReqs: make(map[uint64]*accessListRequest), extProgress: new(SyncProgress), } @@ -563,8 +434,7 @@ func (s *Syncer) Register(peer SyncPeer) error { s.accountIdlers[id] = struct{}{} s.storageIdlers[id] = struct{}{} s.bytecodeIdlers[id] = struct{}{} - s.trienodeHealIdlers[id] = struct{}{} - s.bytecodeHealIdlers[id] = struct{}{} + s.accessListIdlers[id] = struct{}{} s.lock.Unlock() // Notify any active syncs that a new peer can be assigned data @@ -591,8 +461,7 @@ func (s *Syncer) Unregister(id string) error { delete(s.accountIdlers, id) delete(s.storageIdlers, id) delete(s.bytecodeIdlers, id) - delete(s.trienodeHealIdlers, id) - delete(s.bytecodeHealIdlers, id) + delete(s.accessListIdlers, id) s.lock.Unlock() // Notify any active syncs that pending requests need to be reverted @@ -600,52 +469,41 @@ func (s *Syncer) Unregister(id string) error { return nil } +// errPivotStale is returned from download when the pivot has become stale +// and the syncer needs to perform access list catch-up before continuing. +var errPivotStale = errors.New("pivot stale") + // Sync starts (or resumes a previous) sync cycle to iterate over a state trie // with the given root and reconstruct the nodes based on the snapshot leaves. -// Previously downloaded segments will not be redownloaded of fixed, rather any -// errors will be healed after the leaves are fully accumulated. -func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { - // Move the trie root from any previous value, revert stateless markers for - // any peers and initialize the syncer if it was not yet run +// The number parameter is the block number of the pivot block. +func (s *Syncer) Sync(root common.Hash, number uint64, cancel chan struct{}) error { s.lock.Lock() s.root = root - s.healer = &healTask{ - scheduler: state.NewStateSync(root, s.db, s.onHealState, s.scheme), - trieTasks: make(map[string]common.Hash), - codeTasks: make(map[common.Hash]struct{}), - } + s.number = number + s.previousRoot = root // Default: no pivot move. loadSyncStatus may overwrite. + s.previousNumber = number s.statelessPeers = make(map[string]struct{}) s.lock.Unlock() - if s.startTime.IsZero() { s.startTime = time.Now() } - // Retrieve the previous sync status from LevelDB and abort if already synced + + // Retrieve the previous sync status from DB. If there's no persisted + // status, sync is either fresh or already complete. s.loadSyncStatus() - if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 { - log.Debug("Snapshot sync already completed") - return nil - } - defer func() { // Persist any progress, independent of failure - for _, task := range s.tasks { - s.forwardAccountTask(task) + var syncComplete bool + defer func() { + if !syncComplete { + for _, task := range s.tasks { + s.forwardAccountTask(task) + } + s.cleanAccountTasks() + s.saveSyncStatus() } - s.cleanAccountTasks() - s.saveSyncStatus() }() log.Debug("Starting snapshot sync cycle", "root", root) - - // Flush out the last committed raw states - defer func() { - if s.stateWriter.ValueSize() > 0 { - s.stateWriter.Write() - s.stateWriter.Reset() - } - }() defer s.report(true) - // commit any trie- and bytecode-healing data. - defer s.commitHealer(true) // Whether sync completed or not, disregard any future packets defer func() { @@ -654,84 +512,101 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { s.accountReqs = make(map[uint64]*accountRequest) s.storageReqs = make(map[uint64]*storageRequest) s.bytecodeReqs = make(map[uint64]*bytecodeRequest) - s.trienodeHealReqs = make(map[uint64]*trienodeHealRequest) - s.bytecodeHealReqs = make(map[uint64]*bytecodeHealRequest) + s.accessListReqs = make(map[uint64]*accessListRequest) s.lock.Unlock() }() - // Keep scheduling sync tasks + + // Sync loop + log.Info("Starting state download", "root", root) + for { + // Download: fetch all required state data + err := s.download(cancel) + if err == errPivotStale { + // Pivot moved: catch up to new pivot + if err := s.catchUp(cancel); err != nil { + return err + } + s.resetDownload(root, number) + log.Info("Resuming state download", "root", root) + continue + } + + // Download error that isn't a stale pivot. This is typically due to + // the downloader cancelling the sync because the pivot moved. This + // error propagates to the downloader which will restart the sync with + // a new root. + if err != nil { + return err + } + log.Info("State download complete", "root", root) + + // Trie rebuild: build all tries from flat state and verify root + log.Info("Starting trie rebuild", "root", root) + if err := triedb.GenerateTrie(s.db, s.scheme, root); err != nil { + return err + } + log.Info("Trie rebuild complete", "root", root) + + // Sync complete: clear persisted status so we don't re-run. + // Set syncComplete to prevent the deferred saveSyncStatus from + // overwriting the nil. + syncComplete = true + rawdb.WriteSnapshotSyncStatus(s.db, nil) + return nil + } +} + +// download runs the bulk flat-state download. It fetches +// account ranges, storage slots, and bytecodes, writing flat state to disk. +func (s *Syncer) download(cancel chan struct{}) error { + // If the pivot moved since the last run (downloader cancelled and restarted + // us with a new root), signal catch-up before downloading. + if s.previousRoot != s.root { + return errPivotStale + } + + // Subscribe to peer events peerJoin := make(chan string, 16) peerJoinSub := s.peerJoin.Subscribe(peerJoin) defer peerJoinSub.Unsubscribe() - peerDrop := make(chan string, 16) peerDropSub := s.peerDrop.Subscribe(peerDrop) defer peerDropSub.Unsubscribe() - // Create a set of unique channels for this sync cycle. We need these to be - // ephemeral so a data race doesn't accidentally deliver something stale on - // a persistent channel across syncs (yup, this happened) + // Create ephemeral channels for this download cycle var ( - accountReqFails = make(chan *accountRequest) - storageReqFails = make(chan *storageRequest) - bytecodeReqFails = make(chan *bytecodeRequest) - accountResps = make(chan *accountResponse) - storageResps = make(chan *storageResponse) - bytecodeResps = make(chan *bytecodeResponse) - trienodeHealReqFails = make(chan *trienodeHealRequest) - bytecodeHealReqFails = make(chan *bytecodeHealRequest) - trienodeHealResps = make(chan *trienodeHealResponse) - bytecodeHealResps = make(chan *bytecodeHealResponse) + accountReqFails = make(chan *accountRequest) + storageReqFails = make(chan *storageRequest) + bytecodeReqFails = make(chan *bytecodeRequest) + accountResps = make(chan *accountResponse) + storageResps = make(chan *storageResponse) + bytecodeResps = make(chan *bytecodeResponse) ) for { - // Remove all completed tasks and terminate sync if everything's done + // Remove all completed tasks and terminate if everything's done s.cleanStorageTasks() s.cleanAccountTasks() - if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 { - // State healing phase completed, record the elapsed time in metrics. - // Note: healing may be rerun in subsequent cycles to fill gaps between - // pivot states (e.g., if chain sync takes longer). - if !s.healStartTime.IsZero() { - stateHealTimeGauge.Inc(int64(time.Since(s.healStartTime))) - log.Info("State healing phase is completed", "elapsed", common.PrettyDuration(time.Since(s.healStartTime))) - s.healStartTime = time.Time{} - } + if len(s.tasks) == 0 { return nil } + // Assign all the data retrieval tasks to any free peers s.assignAccountTasks(accountResps, accountReqFails, cancel) s.assignBytecodeTasks(bytecodeResps, bytecodeReqFails, cancel) s.assignStorageTasks(storageResps, storageReqFails, cancel) - if len(s.tasks) == 0 { - // State sync phase completed, record the elapsed time in metrics. - // Note: the initial state sync runs only once, regardless of whether - // a new cycle is started later. Any state differences in subsequent - // cycles will be handled by the state healer. - s.syncTimeOnce.Do(func() { - stateSyncTimeGauge.Update(int64(time.Since(s.startTime))) - log.Info("State sync phase is completed", "elapsed", common.PrettyDuration(time.Since(s.startTime))) - }) - if s.healStartTime.IsZero() { - s.healStartTime = time.Now() - } - s.assignTrienodeHealTasks(trienodeHealResps, trienodeHealReqFails, cancel) - s.assignBytecodeHealTasks(bytecodeHealResps, bytecodeHealReqFails, cancel) - } // Update sync progress s.lock.Lock() s.extProgress = &SyncProgress{ - AccountSynced: s.accountSynced, - AccountBytes: s.accountBytes, - BytecodeSynced: s.bytecodeSynced, - BytecodeBytes: s.bytecodeBytes, - StorageSynced: s.storageSynced, - StorageBytes: s.storageBytes, - TrienodeHealSynced: s.trienodeHealSynced, - TrienodeHealBytes: s.trienodeHealBytes, - BytecodeHealSynced: s.bytecodeHealSynced, - BytecodeHealBytes: s.bytecodeHealBytes, + AccountSynced: s.accountSynced, + AccountBytes: s.accountBytes, + BytecodeSynced: s.bytecodeSynced, + BytecodeBytes: s.bytecodeBytes, + StorageSynced: s.storageSynced, + StorageBytes: s.storageBytes, } s.lock.Unlock() + // Wait for something to happen select { case <-s.update: @@ -749,10 +624,6 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { s.revertBytecodeRequest(req) case req := <-storageReqFails: s.revertStorageRequest(req) - case req := <-trienodeHealReqFails: - s.revertTrienodeHealRequest(req) - case req := <-bytecodeHealReqFails: - s.revertBytecodeHealRequest(req) case res := <-accountResps: s.processAccountResponse(res) @@ -760,16 +631,259 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { s.processBytecodeResponse(res) case res := <-storageResps: s.processStorageResponse(res) - case res := <-trienodeHealResps: - s.processTrienodeHealResponse(res) - case res := <-bytecodeHealResps: - s.processBytecodeHealResponse(res) } + // Report stats if something meaningful happened s.report(false) } } +// resetDownload resets the download state for a new pivot after catch-up. +// It regenerates the task list for accounts not yet downloaded, clears +// in-flight requests, and updates the root. +func (s *Syncer) resetDownload(root common.Hash, number uint64) { + s.lock.Lock() + s.root = root + s.number = number + s.previousRoot = root // Prevent download() from returning errPivotStale again + s.previousNumber = number + + // Clear stateless peers bc they may be able to serve the new pivot + s.statelessPeers = make(map[string]struct{}) + s.lock.Unlock() +} + +// catchUp runs the BAL catch-up. When the pivot has moved (previousRoot != +// root), it fetches BALs for the gap blocks, verifies them against +// block headers, and applies the diffs to roll flat state forward. +func (s *Syncer) catchUp(cancel chan struct{}) error { + s.lock.RLock() + from := s.previousNumber + 1 + to := s.number + s.lock.RUnlock() + + // The new pivot must be ahead of the old one. This can fail if a reorg + // replaced the block at the pivot height (same number, different root) + // or if a deep reorg shortened the chain past the old pivot. In either + // case, catch-up can't roll forward, so wipe progress and return an + // error so the caller restarts with a fresh sync. + // + // Note: this check lives here rather than in checkDeepReorg because + // catchUp is reached both when the downloader actively moves the pivot + // (via restartSnapSync) and when the syncer resumes from persisted + // progress after a restart. checkDeepReorg only covers the former. + if from > to { + log.Warn("Catch-up range inverted, wiping sync progress", "from", from, "to", to) + rawdb.WriteSnapshotSyncStatus(s.db, nil) + return fmt.Errorf("catch-up range inverted (from %d > to %d): pivot reorged", from, to) + } + log.Info("Starting access list catch-up", "from", from, "to", to, "blocks", to-from+1) + + // Collect block hashes for the gap range + hashes := make([]common.Hash, 0, to-from+1) + for num := from; num <= to; num++ { + hash := rawdb.ReadCanonicalHash(s.db, num) + if hash == (common.Hash{}) { + return fmt.Errorf("missing canonical hash for block %d during catch-up", num) + } + hashes = append(hashes, hash) + } + + // Fetch BALs from peers + rawBALs, err := s.fetchAccessLists(hashes, cancel) + if err != nil { + return err + } + + // Verify and apply each BAL in block order + for i, raw := range rawBALs { + num := from + uint64(i) + hash := hashes[i] + + // Decode the raw RLP into a BlockAccessList + var bal bal.BlockAccessList + if err := rlp.DecodeBytes(raw, &bal); err != nil { + return fmt.Errorf("failed to decode BAL for block %d: %v", num, err) + } + + // Verify against the block header + header := rawdb.ReadHeader(s.db, hash, num) + if header == nil { + return fmt.Errorf("missing header for block %d (hash %v) during catch-up", num, hash) + } + if err := verifyAccessList(&bal, header); err != nil { + return fmt.Errorf("BAL verification failed for block %d: %v", num, err) + } + + // Apply the state diffs + if err := s.applyAccessList(&bal); err != nil { + return fmt.Errorf("BAL application failed for block %d: %v", num, err) + } + } + log.Info("Access list catch-up complete", "blocks", len(rawBALs)) + return nil +} + +// fetchAccessLists fetches BALs for the given block hashes from +// remote peers. It runs its own event loop to assign requests +// to idle peers and process responses asynchronously. Results are returned in +// the same order as the input hashes. +func (s *Syncer) fetchAccessLists(hashes []common.Hash, cancel chan struct{}) ([]rlp.RawValue, error) { + log.Debug("Fetching access lists for catch-up", "blocks", len(hashes)) + + // Subscribe to peer events + peerJoin := make(chan string, 16) + peerJoinSub := s.peerJoin.Subscribe(peerJoin) + defer peerJoinSub.Unsubscribe() + peerDrop := make(chan string, 16) + peerDropSub := s.peerDrop.Subscribe(peerDrop) + defer peerDropSub.Unsubscribe() + + // pending = hashes not yet assigned to a peer, fetched = collected results. + pending := make(map[common.Hash]struct{}, len(hashes)) + for _, h := range hashes { + pending[h] = struct{}{} + } + fetched := make(map[common.Hash]rlp.RawValue, len(hashes)) + + // Create ephemeral channels for this fetch cycle + var ( + accessListReqFails = make(chan *accessListRequest) + accessListResps = make(chan *accessListResponse) + ) + for len(fetched) < len(hashes) { + // Assign access list retrieval tasks to idle peers + s.assignAccessListTasks(pending, accessListResps, accessListReqFails, cancel) + + // Wait for something to happen + select { + case <-s.update: + // Something happened (new peer, delivery, timeout), recheck + case <-peerJoin: + // A new peer joined, try to assign it work + case id := <-peerDrop: + // Re-add hashes from any requests for this peer + s.lock.Lock() + for _, req := range s.accessListReqs { + if req.peer == id { + for _, h := range req.hashes { + pending[h] = struct{}{} + } + } + } + s.lock.Unlock() + s.revertRequests(id) + case <-cancel: + return nil, ErrCancelled + + case req := <-accessListReqFails: + s.revertAccessListRequest(req) + for _, h := range req.hashes { + pending[h] = struct{}{} + } + case res := <-accessListResps: + s.processAccessListResponse(res, pending, fetched) + } + } + // Assemble results in input order + results := make([]rlp.RawValue, len(hashes)) + for i, h := range hashes { + results[i] = fetched[h] + } + return results, nil +} + +// assignAccessListTasks attempts to assign access list fetch requests to idle +// peers for any hashes still in pending. +func (s *Syncer) assignAccessListTasks(pending map[common.Hash]struct{}, success chan *accessListResponse, fail chan *accessListRequest, cancel chan struct{}) { + s.lock.Lock() + defer s.lock.Unlock() + idlers := s.sortIdlePeers(s.accessListIdlers, AccessListsMsg) + + // Iterate over pending hashes and assign to idle peers + for len(idlers.ids) > 0 && len(pending) > 0 { + var ( + idle = idlers.ids[0] + peer = s.peers[idle] + cap = idlers.caps[0] + ) + idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] + + // Generate a unique request ID + var reqid uint64 + for { + reqid = uint64(rand.Int63()) + if reqid == 0 { + continue + } + if _, ok := s.accessListReqs[reqid]; ok { + continue + } + break + } + + // Collect hashes to fetch, capped by peer capacity and the + // EIP-8189 2 MiB response soft limit (~72 KiB/BAL -> 28 blocks). + if cap > maxAccessListRequestCount { + cap = maxAccessListRequestCount + } + batch := make([]common.Hash, 0, cap) + for h := range pending { + delete(pending, h) + batch = append(batch, h) + if len(batch) >= cap { + break + } + } + req := &accessListRequest{ + peer: idle, + id: reqid, + hashes: batch, + time: time.Now(), + deliver: success, + revert: fail, + cancel: cancel, + stale: make(chan struct{}), + } + req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { + peer.Log().Debug("Access list request timed out", "reqid", reqid) + s.rates.Update(idle, AccessListsMsg, 0, 0) + s.scheduleRevertAccessListRequest(req) + }) + s.accessListReqs[reqid] = req + delete(s.accessListIdlers, idle) + + s.pend.Add(1) + go func() { + defer s.pend.Done() + + // Attempt to send the remote request and revert if it fails + if err := peer.RequestAccessLists(reqid, batch, softResponseLimit); err != nil { + log.Debug("Failed to request access lists", "err", err) + s.scheduleRevertAccessListRequest(req) + } + }() + } +} + +// processAccessListResponse handles a successful access list response by +// matching results to pending hashes and storing them. +func (s *Syncer) processAccessListResponse(res *accessListResponse, pending map[common.Hash]struct{}, fetched map[common.Hash]rlp.RawValue) { + // Each response entry corresponds to the requested hash at the same index + for i, raw := range res.accessLists { + if i >= len(res.req.hashes) { + break + } + h := res.req.hashes[i] + fetched[h] = raw + delete(pending, h) + } + // Re-add hashes that were not served back to pending + for i := len(res.accessLists); i < len(res.req.hashes); i++ { + pending[res.req.hashes[i]] = struct{}{} + } +} + // loadSyncStatus retrieves a previously aborted sync status from the database, // or generates a fresh one if none is available. func (s *Syncer) loadSyncStatus() { @@ -790,54 +904,18 @@ func (s *Syncer) loadSyncStatus() { task.stateCompleted[hash] = struct{}{} } task.StorageCompleted = nil - - // Allocate batch for account trie generation - task.genBatch = ethdb.HookedBatch{ - Batch: s.db.NewBatch(), - OnPut: func(key []byte, value []byte) { - s.accountBytes += common.StorageSize(len(key) + len(value)) - }, - } - if s.scheme == rawdb.HashScheme { - task.genTrie = newHashTrie(task.genBatch) - } - if s.scheme == rawdb.PathScheme { - task.genTrie = newPathTrie(common.Hash{}, task.Next != common.Hash{}, s.db, task.genBatch) - } - // Restore leftover storage tasks - for accountHash, subtasks := range task.SubTasks { - for _, subtask := range subtasks { - subtask.genBatch = ethdb.HookedBatch{ - Batch: s.db.NewBatch(), - OnPut: func(key []byte, value []byte) { - s.storageBytes += common.StorageSize(len(key) + len(value)) - }, - } - if s.scheme == rawdb.HashScheme { - subtask.genTrie = newHashTrie(subtask.genBatch) - } - if s.scheme == rawdb.PathScheme { - subtask.genTrie = newPathTrie(accountHash, subtask.Next != common.Hash{}, s.db, subtask.genBatch) - } - } - } } s.lock.Lock() defer s.lock.Unlock() - s.snapped = len(s.tasks) == 0 - + s.previousRoot = progress.Root + s.previousNumber = progress.BlockNumber s.accountSynced = progress.AccountSynced s.accountBytes = progress.AccountBytes s.bytecodeSynced = progress.BytecodeSynced s.bytecodeBytes = progress.BytecodeBytes s.storageSynced = progress.StorageSynced s.storageBytes = progress.StorageBytes - - s.trienodeHealSynced = progress.TrienodeHealSynced - s.trienodeHealBytes = progress.TrienodeHealBytes - s.bytecodeHealSynced = progress.BytecodeHealSynced - s.bytecodeHealBytes = progress.BytecodeHealBytes return } } @@ -848,8 +926,6 @@ func (s *Syncer) loadSyncStatus() { s.accountSynced, s.accountBytes = 0, 0 s.bytecodeSynced, s.bytecodeBytes = 0, 0 s.storageSynced, s.storageBytes = 0, 0 - s.trienodeHealSynced, s.trienodeHealBytes = 0, 0 - s.bytecodeHealSynced, s.bytecodeHealBytes = 0, 0 var next common.Hash step := new(big.Int).Sub( @@ -864,26 +940,11 @@ func (s *Syncer) loadSyncStatus() { // Make sure we don't overflow if the step is not a proper divisor last = common.MaxHash } - batch := ethdb.HookedBatch{ - Batch: s.db.NewBatch(), - OnPut: func(key []byte, value []byte) { - s.accountBytes += common.StorageSize(len(key) + len(value)) - }, - } - var tr genTrie - if s.scheme == rawdb.HashScheme { - tr = newHashTrie(batch) - } - if s.scheme == rawdb.PathScheme { - tr = newPathTrie(common.Hash{}, next != common.Hash{}, s.db, batch) - } s.tasks = append(s.tasks, &accountTask{ Next: next, Last: last, SubTasks: make(map[common.Hash][]*storageTask), - genBatch: batch, stateCompleted: make(map[common.Hash]struct{}), - genTrie: tr, }) log.Debug("Created account sync task", "from", next, "last", last) next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) @@ -894,23 +955,6 @@ func (s *Syncer) loadSyncStatus() { func (s *Syncer) saveSyncStatus() { // Serialize any partial progress to disk before spinning down for _, task := range s.tasks { - // Claim the right boundary as incomplete before flushing the - // accumulated nodes in batch, the nodes on right boundary - // will be discarded and cleaned up by this call. - task.genTrie.commit(false) - if err := task.genBatch.Write(); err != nil { - log.Error("Failed to persist account slots", "err", err) - } - for _, subtasks := range task.SubTasks { - for _, subtask := range subtasks { - // Same for account trie, discard and cleanup the - // incomplete right boundary. - subtask.genTrie.commit(false) - if err := subtask.genBatch.Write(); err != nil { - log.Error("Failed to persist storage slots", "err", err) - } - } - } // Save the account hashes of completed storage. task.StorageCompleted = make([]common.Hash, 0, len(task.stateCompleted)) for hash := range task.stateCompleted { @@ -922,17 +966,15 @@ func (s *Syncer) saveSyncStatus() { } // Store the actual progress markers progress := &SyncProgress{ - Tasks: s.tasks, - AccountSynced: s.accountSynced, - AccountBytes: s.accountBytes, - BytecodeSynced: s.bytecodeSynced, - BytecodeBytes: s.bytecodeBytes, - StorageSynced: s.storageSynced, - StorageBytes: s.storageBytes, - TrienodeHealSynced: s.trienodeHealSynced, - TrienodeHealBytes: s.trienodeHealBytes, - BytecodeHealSynced: s.bytecodeHealSynced, - BytecodeHealBytes: s.bytecodeHealBytes, + Root: s.root, + BlockNumber: s.number, + Tasks: s.tasks, + AccountSynced: s.accountSynced, + AccountBytes: s.accountBytes, + BytecodeSynced: s.bytecodeSynced, + BytecodeBytes: s.bytecodeBytes, + StorageSynced: s.storageSynced, + StorageBytes: s.storageBytes, } status, err := json.Marshal(progress) if err != nil { @@ -942,15 +984,10 @@ func (s *Syncer) saveSyncStatus() { } // Progress returns the snap sync status statistics. -func (s *Syncer) Progress() (*SyncProgress, *SyncPending) { +func (s *Syncer) Progress() *SyncProgress { s.lock.Lock() defer s.lock.Unlock() - pending := new(SyncPending) - if s.healer != nil { - pending.TrienodeHeal = uint64(len(s.healer.trieTasks)) - pending.BytecodeHeal = uint64(len(s.healer.codeTasks)) - } - return s.extProgress, pending + return s.extProgress } // cleanAccountTasks removes account range retrieval tasks that have already been @@ -967,13 +1004,8 @@ func (s *Syncer) cleanAccountTasks() { i-- } } - // If everything was just finalized just, generate the account trie and start heal + // If everything was just finalized, push the final sync report if len(s.tasks) == 0 { - s.lock.Lock() - s.snapped = true - s.lock.Unlock() - - // Push the final sync report s.reportSyncProgress(true) } } @@ -1022,23 +1054,10 @@ func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *ac defer s.lock.Unlock() // Sort the peers by download capacity to use faster ones if many available - idlers := &capacitySort{ - ids: make([]string, 0, len(s.accountIdlers)), - caps: make([]int, 0, len(s.accountIdlers)), - } - targetTTL := s.rates.TargetTimeout() - for id := range s.accountIdlers { - if _, ok := s.statelessPeers[id]; ok { - continue - } - idlers.ids = append(idlers.ids, id) - idlers.caps = append(idlers.caps, s.rates.Capacity(id, AccountRangeMsg, targetTTL)) - } + idlers := s.sortIdlePeers(s.accountIdlers, AccountRangeMsg) if len(idlers.ids) == 0 { return } - sort.Sort(sort.Reverse(idlers)) - // Iterate over all the tasks and try to find a pending one for _, task := range s.tasks { // Skip any tasks already filling @@ -1118,24 +1137,10 @@ func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan * s.lock.Lock() defer s.lock.Unlock() - // Sort the peers by download capacity to use faster ones if many available - idlers := &capacitySort{ - ids: make([]string, 0, len(s.bytecodeIdlers)), - caps: make([]int, 0, len(s.bytecodeIdlers)), - } - targetTTL := s.rates.TargetTimeout() - for id := range s.bytecodeIdlers { - if _, ok := s.statelessPeers[id]; ok { - continue - } - idlers.ids = append(idlers.ids, id) - idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL)) - } + idlers := s.sortIdlePeers(s.bytecodeIdlers, ByteCodesMsg) if len(idlers.ids) == 0 { return } - sort.Sort(sort.Reverse(idlers)) - // Iterate over all the tasks and try to find a pending one for _, task := range s.tasks { // Skip any tasks not in the bytecode retrieval phase @@ -1221,24 +1226,10 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st s.lock.Lock() defer s.lock.Unlock() - // Sort the peers by download capacity to use faster ones if many available - idlers := &capacitySort{ - ids: make([]string, 0, len(s.storageIdlers)), - caps: make([]int, 0, len(s.storageIdlers)), - } - targetTTL := s.rates.TargetTimeout() - for id := range s.storageIdlers { - if _, ok := s.statelessPeers[id]; ok { - continue - } - idlers.ids = append(idlers.ids, id) - idlers.caps = append(idlers.caps, s.rates.Capacity(id, StorageRangesMsg, targetTTL)) - } + idlers := s.sortIdlePeers(s.storageIdlers, StorageRangesMsg) if len(idlers.ids) == 0 { return } - sort.Sort(sort.Reverse(idlers)) - // Iterate over all the tasks and try to find a pending one for _, task := range s.tasks { // Skip any tasks not in the storage retrieval phase @@ -1372,250 +1363,6 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st } } -// assignTrienodeHealTasks attempts to match idle peers to trie node requests to -// heal any trie errors caused by the snap sync's chunked retrieval model. -func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fail chan *trienodeHealRequest, cancel chan struct{}) { - s.lock.Lock() - defer s.lock.Unlock() - - // Sort the peers by download capacity to use faster ones if many available - idlers := &capacitySort{ - ids: make([]string, 0, len(s.trienodeHealIdlers)), - caps: make([]int, 0, len(s.trienodeHealIdlers)), - } - targetTTL := s.rates.TargetTimeout() - for id := range s.trienodeHealIdlers { - if _, ok := s.statelessPeers[id]; ok { - continue - } - idlers.ids = append(idlers.ids, id) - idlers.caps = append(idlers.caps, s.rates.Capacity(id, TrieNodesMsg, targetTTL)) - } - if len(idlers.ids) == 0 { - return - } - sort.Sort(sort.Reverse(idlers)) - - // Iterate over pending tasks and try to find a peer to retrieve with - for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 { - // If there are not enough trie tasks queued to fully assign, fill the - // queue from the state sync scheduler. The trie synced schedules these - // together with bytecodes, so we need to queue them combined. - var ( - have = len(s.healer.trieTasks) + len(s.healer.codeTasks) - want = maxTrieRequestCount + maxCodeRequestCount - ) - if have < want { - paths, hashes, codes := s.healer.scheduler.Missing(want - have) - for i, path := range paths { - s.healer.trieTasks[path] = hashes[i] - } - for _, hash := range codes { - s.healer.codeTasks[hash] = struct{}{} - } - } - // If all the heal tasks are bytecodes or already downloading, bail - if len(s.healer.trieTasks) == 0 { - return - } - // Task pending retrieval, try to find an idle peer. If no such peer - // exists, we probably assigned tasks for all (or they are stateless). - // Abort the entire assignment mechanism. - if len(idlers.ids) == 0 { - return - } - var ( - idle = idlers.ids[0] - peer = s.peers[idle] - cap = idlers.caps[0] - ) - idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] - - // Matched a pending task to an idle peer, allocate a unique request id - var reqid uint64 - for { - reqid = uint64(rand.Int63()) - if reqid == 0 { - continue - } - if _, ok := s.trienodeHealReqs[reqid]; ok { - continue - } - break - } - // Generate the network query and send it to the peer - if cap > maxTrieRequestCount { - cap = maxTrieRequestCount - } - cap = int(float64(cap) / s.trienodeHealThrottle) - if cap <= 0 { - cap = 1 - } - var ( - hashes = make([]common.Hash, 0, cap) - paths = make([]string, 0, cap) - pathsets = make([]TrieNodePathSet, 0, cap) - ) - for path, hash := range s.healer.trieTasks { - delete(s.healer.trieTasks, path) - - paths = append(paths, path) - hashes = append(hashes, hash) - if len(paths) >= cap { - break - } - } - // Group requests by account hash - paths, hashes, _, pathsets = sortByAccountPath(paths, hashes) - req := &trienodeHealRequest{ - peer: idle, - id: reqid, - time: time.Now(), - deliver: success, - revert: fail, - cancel: cancel, - stale: make(chan struct{}), - paths: paths, - hashes: hashes, - task: s.healer, - } - req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { - peer.Log().Debug("Trienode heal request timed out", "reqid", reqid) - s.rates.Update(idle, TrieNodesMsg, 0, 0) - s.scheduleRevertTrienodeHealRequest(req) - }) - s.trienodeHealReqs[reqid] = req - delete(s.trienodeHealIdlers, idle) - - s.pend.Add(1) - go func(root common.Hash) { - defer s.pend.Done() - - // Attempt to send the remote request and revert if it fails - if err := peer.RequestTrieNodes(reqid, root, len(paths), pathsets, maxRequestSize); err != nil { - log.Debug("Failed to request trienode healers", "err", err) - s.scheduleRevertTrienodeHealRequest(req) - } - }(s.root) - } -} - -// assignBytecodeHealTasks attempts to match idle peers to bytecode requests to -// heal any trie errors caused by the snap sync's chunked retrieval model. -func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fail chan *bytecodeHealRequest, cancel chan struct{}) { - s.lock.Lock() - defer s.lock.Unlock() - - // Sort the peers by download capacity to use faster ones if many available - idlers := &capacitySort{ - ids: make([]string, 0, len(s.bytecodeHealIdlers)), - caps: make([]int, 0, len(s.bytecodeHealIdlers)), - } - targetTTL := s.rates.TargetTimeout() - for id := range s.bytecodeHealIdlers { - if _, ok := s.statelessPeers[id]; ok { - continue - } - idlers.ids = append(idlers.ids, id) - idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL)) - } - if len(idlers.ids) == 0 { - return - } - sort.Sort(sort.Reverse(idlers)) - - // Iterate over pending tasks and try to find a peer to retrieve with - for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 { - // If there are not enough trie tasks queued to fully assign, fill the - // queue from the state sync scheduler. The trie synced schedules these - // together with trie nodes, so we need to queue them combined. - var ( - have = len(s.healer.trieTasks) + len(s.healer.codeTasks) - want = maxTrieRequestCount + maxCodeRequestCount - ) - if have < want { - paths, hashes, codes := s.healer.scheduler.Missing(want - have) - for i, path := range paths { - s.healer.trieTasks[path] = hashes[i] - } - for _, hash := range codes { - s.healer.codeTasks[hash] = struct{}{} - } - } - // If all the heal tasks are trienodes or already downloading, bail - if len(s.healer.codeTasks) == 0 { - return - } - // Task pending retrieval, try to find an idle peer. If no such peer - // exists, we probably assigned tasks for all (or they are stateless). - // Abort the entire assignment mechanism. - if len(idlers.ids) == 0 { - return - } - var ( - idle = idlers.ids[0] - peer = s.peers[idle] - cap = idlers.caps[0] - ) - idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] - - // Matched a pending task to an idle peer, allocate a unique request id - var reqid uint64 - for { - reqid = uint64(rand.Int63()) - if reqid == 0 { - continue - } - if _, ok := s.bytecodeHealReqs[reqid]; ok { - continue - } - break - } - // Generate the network query and send it to the peer - if cap > maxCodeRequestCount { - cap = maxCodeRequestCount - } - hashes := make([]common.Hash, 0, cap) - for hash := range s.healer.codeTasks { - delete(s.healer.codeTasks, hash) - - hashes = append(hashes, hash) - if len(hashes) >= cap { - break - } - } - req := &bytecodeHealRequest{ - peer: idle, - id: reqid, - time: time.Now(), - deliver: success, - revert: fail, - cancel: cancel, - stale: make(chan struct{}), - hashes: hashes, - task: s.healer, - } - req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { - peer.Log().Debug("Bytecode heal request timed out", "reqid", reqid) - s.rates.Update(idle, ByteCodesMsg, 0, 0) - s.scheduleRevertBytecodeHealRequest(req) - }) - s.bytecodeHealReqs[reqid] = req - delete(s.bytecodeHealIdlers, idle) - - s.pend.Add(1) - go func() { - defer s.pend.Done() - - // Attempt to send the remote request and revert if it fails - if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil { - log.Debug("Failed to request bytecode healers", "err", err) - s.scheduleRevertBytecodeHealRequest(req) - } - }() - } -} - // revertRequests locates all the currently pending requests from a particular // peer and reverts them, rescheduling for others to fulfill. func (s *Syncer) revertRequests(peer string) { @@ -1639,16 +1386,10 @@ func (s *Syncer) revertRequests(peer string) { storageReqs = append(storageReqs, req) } } - var trienodeHealReqs []*trienodeHealRequest - for _, req := range s.trienodeHealReqs { + var accessListReqs []*accessListRequest + for _, req := range s.accessListReqs { if req.peer == peer { - trienodeHealReqs = append(trienodeHealReqs, req) - } - } - var bytecodeHealReqs []*bytecodeHealRequest - for _, req := range s.bytecodeHealReqs { - if req.peer == peer { - bytecodeHealReqs = append(bytecodeHealReqs, req) + accessListReqs = append(accessListReqs, req) } } s.lock.Unlock() @@ -1663,11 +1404,8 @@ func (s *Syncer) revertRequests(peer string) { for _, req := range storageReqs { s.revertStorageRequest(req) } - for _, req := range trienodeHealReqs { - s.revertTrienodeHealRequest(req) - } - for _, req := range bytecodeHealReqs { - s.revertBytecodeHealRequest(req) + for _, req := range accessListReqs { + s.revertAccessListRequest(req) } } @@ -1810,9 +1548,12 @@ func (s *Syncer) revertStorageRequest(req *storageRequest) { } } -// scheduleRevertTrienodeHealRequest asks the event loop to clean up a trienode heal -// request and return all failed retrieval tasks to the scheduler for reassignment. -func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) { +// scheduleRevertAccessListRequest asks the event loop to clean up an access +// list request and return all failed retrieval tasks for reassignment. +// +// Note, this needs to run on the event runloop thread to reschedule to idle +// peers. On peer threads, use scheduleRevertAccessListRequest. +func (s *Syncer) scheduleRevertAccessListRequest(req *accessListRequest) { select { case req.revert <- req: // Sync event loop notified @@ -1823,16 +1564,13 @@ func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) { } } -// revertTrienodeHealRequest cleans up a trienode heal request and returns all +// revertAccessListRequest cleans up an access list request and returns all // failed retrieval tasks to the scheduler for reassignment. -// -// Note, this needs to run on the event runloop thread to reschedule to idle peers. -// On peer threads, use scheduleRevertTrienodeHealRequest. -func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) { - log.Debug("Reverting trienode heal request", "peer", req.peer) +func (s *Syncer) revertAccessListRequest(req *accessListRequest) { + log.Debug("Reverting access list request", "peer", req.peer) select { case <-req.stale: - log.Trace("Trienode heal request already reverted", "peer", req.peer, "reqid", req.id) + log.Trace("Access list request already reverted", "peer", req.peer, "reqid", req.id) return default: } @@ -1841,63 +1579,14 @@ func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) { // Remove the request from the tracked set and restore the peer to the // idle pool so it can be reassigned work (skip if peer already left). s.lock.Lock() - delete(s.trienodeHealReqs, req.id) + delete(s.accessListReqs, req.id) if _, ok := s.peers[req.peer]; ok { - s.trienodeHealIdlers[req.peer] = struct{}{} + s.accessListIdlers[req.peer] = struct{}{} } s.lock.Unlock() - // If there's a timeout timer still running, abort it and mark the trie node - // retrievals as not-pending, ready for rescheduling req.timeout.Stop() - for i, path := range req.paths { - req.task.trieTasks[path] = req.hashes[i] - } -} - -// scheduleRevertBytecodeHealRequest asks the event loop to clean up a bytecode heal -// request and return all failed retrieval tasks to the scheduler for reassignment. -func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) { - select { - case req.revert <- req: - // Sync event loop notified - case <-req.cancel: - // Sync cycle got cancelled - case <-req.stale: - // Request already reverted - } -} - -// revertBytecodeHealRequest cleans up a bytecode heal request and returns all -// failed retrieval tasks to the scheduler for reassignment. -// -// Note, this needs to run on the event runloop thread to reschedule to idle peers. -// On peer threads, use scheduleRevertBytecodeHealRequest. -func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) { - log.Debug("Reverting bytecode heal request", "peer", req.peer) - select { - case <-req.stale: - log.Trace("Bytecode heal request already reverted", "peer", req.peer, "reqid", req.id) - return - default: - } - close(req.stale) - - // Remove the request from the tracked set and restore the peer to the - // idle pool so it can be reassigned work (skip if peer already left). - s.lock.Lock() - delete(s.bytecodeHealReqs, req.id) - if _, ok := s.peers[req.peer]; ok { - s.bytecodeHealIdlers[req.peer] = struct{}{} - } - s.lock.Unlock() - - // If there's a timeout timer still running, abort it and mark the code - // retrievals as not-pending, ready for rescheduling - req.timeout.Stop() - for _, hash := range req.hashes { - req.task.codeTasks[hash] = struct{}{} - } + // Hashes remain in the pending map and will be retried on the next loop iteration } // processAccountResponse integrates an already validated account range response @@ -2148,47 +1837,16 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { largeStorageGauge.Inc(1) } // Our first task is the one that was just filled by this response. - batch := ethdb.HookedBatch{ - Batch: s.db.NewBatch(), - OnPut: func(key []byte, value []byte) { - s.storageBytes += common.StorageSize(len(key) + len(value)) - }, - } - var tr genTrie - if s.scheme == rawdb.HashScheme { - tr = newHashTrie(batch) - } - if s.scheme == rawdb.PathScheme { - // Keep the left boundary as it's the first range. - tr = newPathTrie(account, false, s.db, batch) - } tasks = append(tasks, &storageTask{ - Next: common.Hash{}, - Last: r.End(), - root: acc.Root, - genBatch: batch, - genTrie: tr, + Next: common.Hash{}, + Last: r.End(), + root: acc.Root, }) for r.Next() { - batch := ethdb.HookedBatch{ - Batch: s.db.NewBatch(), - OnPut: func(key []byte, value []byte) { - s.storageBytes += common.StorageSize(len(key) + len(value)) - }, - } - var tr genTrie - if s.scheme == rawdb.HashScheme { - tr = newHashTrie(batch) - } - if s.scheme == rawdb.PathScheme { - tr = newPathTrie(account, true, s.db, batch) - } tasks = append(tasks, &storageTask{ - Next: r.Start(), - Last: r.End(), - root: acc.Root, - genBatch: batch, - genTrie: tr, + Next: r.Start(), + Last: r.End(), + root: acc.Root, }) } for _, task := range tasks { @@ -2232,60 +1890,11 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { // reconstructed later. slots += len(res.hashes[i]) - if i < len(res.hashes)-1 || res.subTask == nil { - // no need to make local reassignment of account: this closure does not outlive the loop - var tr genTrie - if s.scheme == rawdb.HashScheme { - tr = newHashTrie(batch) - } - if s.scheme == rawdb.PathScheme { - // Keep the left boundary as it's complete - tr = newPathTrie(account, false, s.db, batch) - } - for j := 0; j < len(res.hashes[i]); j++ { - tr.update(res.hashes[i][j][:], res.slots[i][j]) - } - tr.commit(true) - } // Persist the received storage segments. These flat state maybe // outdated during the sync, but it can be fixed later during the - // snapshot generation. + // trie rebuild. for j := 0; j < len(res.hashes[i]); j++ { rawdb.WriteStorageSnapshot(batch, account, res.hashes[i][j], res.slots[i][j]) - - // If we're storing large contracts, generate the trie nodes - // on the fly to not trash the gluing points - if i == len(res.hashes)-1 && res.subTask != nil { - res.subTask.genTrie.update(res.hashes[i][j][:], res.slots[i][j]) - } - } - } - // Large contracts could have generated new trie nodes, flush them to disk - if res.subTask != nil { - if res.subTask.done { - root := res.subTask.genTrie.commit(res.subTask.Last == common.MaxHash) - if err := res.subTask.genBatch.Write(); err != nil { - log.Error("Failed to persist stack slots", "err", err) - } - res.subTask.genBatch.Reset() - - // If the chunk's root is an overflown but full delivery, - // clear the heal request. - accountHash := res.accounts[len(res.accounts)-1] - if root == res.subTask.root && rawdb.HasTrieNode(s.db, accountHash, nil, root, s.scheme) { - for i, account := range res.mainTask.res.hashes { - if account == accountHash { - res.mainTask.needHeal[i] = false - skipStorageHealingGauge.Inc(1) - } - } - } - } else if res.subTask.genBatch.ValueSize() > batchSizeThreshold { - res.subTask.genTrie.commit(false) - if err := res.subTask.genBatch.Write(); err != nil { - log.Error("Failed to persist stack slots", "err", err) - } - res.subTask.genBatch.Reset() } } // Flush anything written just now and update the stats @@ -2306,128 +1915,6 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { // task assigners to pick up and fill. } -// processTrienodeHealResponse integrates an already validated trienode response -// into the healer tasks. -func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { - var ( - start = time.Now() - fills int - ) - for i, hash := range res.hashes { - node := res.nodes[i] - - // If the trie node was not delivered, reschedule it - if node == nil { - res.task.trieTasks[res.paths[i]] = res.hashes[i] - continue - } - fills++ - - // Push the trie node into the state syncer - s.trienodeHealSynced++ - s.trienodeHealBytes += common.StorageSize(len(node)) - - err := s.healer.scheduler.ProcessNode(trie.NodeSyncResult{Path: res.paths[i], Data: node}) - switch err { - case nil: - case trie.ErrAlreadyProcessed: - s.trienodeHealDups++ - case trie.ErrNotRequested: - s.trienodeHealNops++ - default: - log.Error("Invalid trienode processed", "hash", hash, "err", err) - } - } - s.commitHealer(false) - - // Calculate the processing rate of one filled trie node - rate := float64(fills) / (float64(time.Since(start)) / float64(time.Second)) - - // Update the currently measured trienode queueing and processing throughput. - // - // The processing rate needs to be updated uniformly independent if we've - // processed 1x100 trie nodes or 100x1 to keep the rate consistent even in - // the face of varying network packets. As such, we cannot just measure the - // time it took to process N trie nodes and update once, we need one update - // per trie node. - // - // Naively, that would be: - // - // for i:=0; i time.Second { - // Periodically adjust the trie node throttler - if float64(pending) > 2*s.trienodeHealRate { - s.trienodeHealThrottle *= trienodeHealThrottleIncrease - } else { - s.trienodeHealThrottle /= trienodeHealThrottleDecrease - } - if s.trienodeHealThrottle > maxTrienodeHealThrottle { - s.trienodeHealThrottle = maxTrienodeHealThrottle - } else if s.trienodeHealThrottle < minTrienodeHealThrottle { - s.trienodeHealThrottle = minTrienodeHealThrottle - } - s.trienodeHealThrottled = time.Now() - - log.Debug("Updated trie node heal throttler", "rate", s.trienodeHealRate, "pending", pending, "throttle", s.trienodeHealThrottle) - } -} - -func (s *Syncer) commitHealer(force bool) { - if !force && s.healer.scheduler.MemSize() < ethdb.IdealBatchSize { - return - } - batch := s.db.NewBatch() - if err := s.healer.scheduler.Commit(batch); err != nil { - log.Crit("Failed to commit healing data", "err", err) - } - if err := batch.Write(); err != nil { - log.Crit("Failed to persist healing data", "err", err) - } - log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize())) -} - -// processBytecodeHealResponse integrates an already validated bytecode response -// into the healer tasks. -func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) { - for i, hash := range res.hashes { - node := res.codes[i] - - // If the trie node was not delivered, reschedule it - if node == nil { - res.task.codeTasks[hash] = struct{}{} - continue - } - // Push the trie node into the state syncer - s.bytecodeHealSynced++ - s.bytecodeHealBytes += common.StorageSize(len(node)) - - err := s.healer.scheduler.ProcessCode(trie.CodeSyncResult{Hash: hash, Data: node}) - switch err { - case nil: - case trie.ErrAlreadyProcessed: - s.bytecodeHealDups++ - case trie.ErrNotRequested: - s.bytecodeHealNops++ - default: - log.Error("Invalid bytecode processed", "hash", hash, "err", err) - } - } - s.commitHealer(false) -} - // forwardAccountTask takes a filled account task and persists anything available // into the database, after which it forwards the next account marker so that the // task's next chunk may be filled. @@ -2441,7 +1928,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { // Persist the received account segments. These flat state maybe // outdated during the sync, but it can be fixed later during the - // snapshot generation. + // trie rebuild. oldAccountBytes := s.accountBytes batch := ethdb.HookedBatch{ @@ -2456,23 +1943,6 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { } slim := types.SlimAccountRLP(*res.accounts[i]) rawdb.WriteAccountSnapshot(batch, hash, slim) - - if !task.needHeal[i] { - // If the storage task is complete, drop it into the stack trie - // to generate account trie nodes for it - full, err := types.FullAccountRLP(slim) // TODO(karalabe): Slim parsing can be omitted - if err != nil { - panic(err) // Really shouldn't ever happen - } - task.genTrie.update(hash[:], full) - } else { - // If the storage task is incomplete, explicitly delete the corresponding - // account item from the account trie to ensure that all nodes along the - // path to the incomplete storage trie are cleaned up. - if err := task.genTrie.delete(hash[:]); err != nil { - panic(err) // Really shouldn't ever happen - } - } } // Flush anything written just now and update the stats if err := batch.Write(); err != nil { @@ -2480,7 +1950,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { } s.accountSynced += uint64(len(res.accounts)) - // Task filling persisted, push it the chunk marker forward to the first + // Task filling persisted, push the chunk marker forward to the first // account still missing data. for i, hash := range res.hashes { if task.needCode[i] || task.needState[i] { @@ -2500,22 +1970,6 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { if task.done && len(task.stateCompleted) != 0 { panic(fmt.Errorf("storage completion flags should be emptied, %d left", len(task.stateCompleted))) } - // Stack trie could have generated trie nodes, push them to disk (we need to - // flush after finalizing task.done. It's fine even if we crash and lose this - // write as it will only cause more data to be downloaded during heal. - if task.done { - task.genTrie.commit(task.Last == common.MaxHash) - if err := task.genBatch.Write(); err != nil { - log.Error("Failed to persist stack account", "err", err) - } - task.genBatch.Reset() - } else if task.genBatch.ValueSize() > batchSizeThreshold { - task.genTrie.commit(false) - if err := task.genBatch.Write(); err != nil { - log.Error("Failed to persist stack account", "err", err) - } - task.genBatch.Reset() - } log.Debug("Persisted range of accounts", "accounts", len(res.accounts), "bytes", s.accountBytes-oldAccountBytes) } @@ -2622,14 +2076,7 @@ func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, acco // OnByteCodes is a callback method to invoke when a batch of contract // bytes codes are received from a remote peer. func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { - s.lock.RLock() - syncing := !s.snapped - s.lock.RUnlock() - - if syncing { - return s.onByteCodes(peer, id, bytecodes) - } - return s.onHealByteCodes(peer, id, bytecodes) + return s.onByteCodes(peer, id, bytecodes) } // onByteCodes is a callback method to invoke when a batch of contract @@ -2879,15 +2326,22 @@ func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slo return nil } -// OnTrieNodes is a callback method to invoke when a batch of trie nodes +// OnAccessLists is a callback method to invoke when a batch of access lists // are received from a remote peer. -func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error { +func (s *Syncer) OnAccessLists(peer SyncPeer, id uint64, accessLists rlp.RawList[rlp.RawValue]) error { + // Convert RawList to slice of raw values + bals, err := accessLists.Items() + if err != nil { + return err + } + + // Calculate total size of returned data var size common.StorageSize - for _, node := range trienodes { - size += common.StorageSize(len(node)) + for _, bal := range bals { + size += common.StorageSize(len(bal)) } logger := peer.Log().New("reqid", id) - logger.Trace("Delivering set of healing trienodes", "trienodes", len(trienodes), "bytes", size) + logger.Trace("Delivering set of BALs", "count", len(bals), "bytes", size) // Whether or not the response is valid, we can mark the peer as idle and // notify the scheduler to assign a new task. If the response is invalid, @@ -2896,7 +2350,7 @@ func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error s.lock.Lock() defer s.lock.Unlock() if _, ok := s.peers[peer.ID()]; ok { - s.trienodeHealIdlers[peer.ID()] = struct{}{} + s.accessListIdlers[peer.ID()] = struct{}{} } select { case s.update <- struct{}{}: @@ -2904,19 +2358,19 @@ func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error } }() s.lock.Lock() + // Ensure the response is for a valid request - req, ok := s.trienodeHealReqs[id] + req, ok := s.accessListReqs[id] if !ok { // Request stale, perhaps the peer timed out but came through in the end - logger.Warn("Unexpected trienode heal packet") + logger.Warn("Unexpected access list packet") s.lock.Unlock() return nil } - delete(s.trienodeHealReqs, id) - s.rates.Update(peer.ID(), TrieNodesMsg, time.Since(req.time), len(trienodes)) + delete(s.accessListReqs, id) + s.rates.Update(peer.ID(), AccessListsMsg, time.Since(req.time), len(bals)) - // Clean up the request timeout timer, we'll see how to proceed further based - // on the actual delivered content + // Clean up the request timeout timer if !req.timeout.Stop() { // The timeout is already triggered, and this request will be reverted+rescheduled s.lock.Unlock() @@ -2924,59 +2378,22 @@ func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error } // Response is valid, but check if peer is signalling that it does not have - // the requested data. For bytecode range queries that means the peer is not - // yet synced. - if len(trienodes) == 0 { - logger.Debug("Peer rejected trienode heal request") + // the requested data. + if len(bals) == 0 { + logger.Debug("Peer rejected access list request") s.statelessPeers[peer.ID()] = struct{}{} s.lock.Unlock() // Signal this request as failed, and ready for rescheduling - s.scheduleRevertTrienodeHealRequest(req) + s.scheduleRevertAccessListRequest(req) return nil } s.lock.Unlock() - // Cross reference the requested trienodes with the response to find gaps - // that the serving node is missing - var ( - hasher = crypto.NewKeccakState() - hash = make([]byte, 32) - nodes = make([][]byte, len(req.hashes)) - fills uint64 - ) - for i, j := 0, 0; i < len(trienodes); i++ { - // Find the next hash that we've been served, leaving misses with nils - hasher.Reset() - hasher.Write(trienodes[i]) - hasher.Read(hash) - - for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { - j++ - } - if j < len(req.hashes) { - nodes[j] = trienodes[i] - fills++ - j++ - continue - } - // We've either ran out of hashes, or got unrequested data - logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i) - - // Signal this request as failed, and ready for rescheduling - s.scheduleRevertTrienodeHealRequest(req) - return errors.New("unexpected healing trienode") - } - // Response validated, send it to the scheduler for filling - s.trienodeHealPend.Add(fills) - defer func() { - s.trienodeHealPend.Add(^(fills - 1)) - }() - response := &trienodeHealResponse{ - paths: req.paths, - task: req.task, - hashes: req.hashes, - nodes: nodes, + // Response validated, send it to the scheduler for filling. + response := &accessListResponse{ + req: req, + accessLists: bals, } select { case req.deliver <- response: @@ -2986,141 +2403,12 @@ func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error return nil } -// onHealByteCodes is a callback method to invoke when a batch of contract -// bytes codes are received from a remote peer in the healing phase. -func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { - var size common.StorageSize - for _, code := range bytecodes { - size += common.StorageSize(len(code)) - } - logger := peer.Log().New("reqid", id) - logger.Trace("Delivering set of healing bytecodes", "bytecodes", len(bytecodes), "bytes", size) - - // Whether or not the response is valid, we can mark the peer as idle and - // notify the scheduler to assign a new task. If the response is invalid, - // we'll drop the peer in a bit. - defer func() { - s.lock.Lock() - defer s.lock.Unlock() - if _, ok := s.peers[peer.ID()]; ok { - s.bytecodeHealIdlers[peer.ID()] = struct{}{} - } - select { - case s.update <- struct{}{}: - default: - } - }() - s.lock.Lock() - // Ensure the response is for a valid request - req, ok := s.bytecodeHealReqs[id] - if !ok { - // Request stale, perhaps the peer timed out but came through in the end - logger.Warn("Unexpected bytecode heal packet") - s.lock.Unlock() - return nil - } - delete(s.bytecodeHealReqs, id) - s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes)) - - // Clean up the request timeout timer, we'll see how to proceed further based - // on the actual delivered content - if !req.timeout.Stop() { - // The timeout is already triggered, and this request will be reverted+rescheduled - s.lock.Unlock() - return nil - } - - // Response is valid, but check if peer is signalling that it does not have - // the requested data. For bytecode range queries that means the peer is not - // yet synced. - if len(bytecodes) == 0 { - logger.Debug("Peer rejected bytecode heal request") - s.statelessPeers[peer.ID()] = struct{}{} - s.lock.Unlock() - - // Signal this request as failed, and ready for rescheduling - s.scheduleRevertBytecodeHealRequest(req) - return nil - } - s.lock.Unlock() - - // Cross reference the requested bytecodes with the response to find gaps - // that the serving node is missing - hasher := crypto.NewKeccakState() - hash := make([]byte, 32) - - codes := make([][]byte, len(req.hashes)) - for i, j := 0, 0; i < len(bytecodes); i++ { - // Find the next hash that we've been served, leaving misses with nils - hasher.Reset() - hasher.Write(bytecodes[i]) - hasher.Read(hash) - - for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { - j++ - } - if j < len(req.hashes) { - codes[j] = bytecodes[i] - j++ - continue - } - // We've either ran out of hashes, or got unrequested data - logger.Warn("Unexpected healing bytecodes", "count", len(bytecodes)-i) - // Signal this request as failed, and ready for rescheduling - s.scheduleRevertBytecodeHealRequest(req) - return errors.New("unexpected healing bytecode") - } - // Response validated, send it to the scheduler for filling - response := &bytecodeHealResponse{ - task: req.task, - hashes: req.hashes, - codes: codes, - } - select { - case req.deliver <- response: - case <-req.cancel: - case <-req.stale: - } - return nil -} - -// onHealState is a callback method to invoke when a flat state(account -// or storage slot) is downloaded during the healing stage. The flat states -// can be persisted blindly and can be fixed later in the generation stage. -// Note it's not concurrent safe, please handle the concurrent issue outside. -func (s *Syncer) onHealState(paths [][]byte, value []byte) error { - if len(paths) == 1 { - var account types.StateAccount - if err := rlp.DecodeBytes(value, &account); err != nil { - return nil // Returning the error here would drop the remote peer - } - blob := types.SlimAccountRLP(account) - rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), blob) - s.accountHealed += 1 - s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(blob)) - } - if len(paths) == 2 { - rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value) - s.storageHealed += 1 - s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value)) - } - if s.stateWriter.ValueSize() > ethdb.IdealBatchSize { - s.stateWriter.Write() // It's fine to ignore the error here - s.stateWriter.Reset() - } - return nil -} - // hashSpace is the total size of the 256 bit hash space for accounts. var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil) // report calculates various status reports and provides it to the user. func (s *Syncer) report(force bool) { - if len(s.tasks) > 0 { - s.reportSyncProgress(force) - return - } - s.reportHealProgress(force) + s.reportSyncProgress(force) } // reportSyncProgress calculates various status reports and provides it to the user. @@ -3169,25 +2457,6 @@ func (s *Syncer) reportSyncProgress(force bool) { "accounts", accounts, "slots", storage, "codes", bytecode, "eta", common.PrettyDuration(estTime-elapsed)) } -// reportHealProgress calculates various status reports and provides it to the user. -func (s *Syncer) reportHealProgress(force bool) { - // Don't report all the events, just occasionally - if !force && time.Since(s.logTime) < 8*time.Second { - return - } - s.logTime = time.Now() - - // Create a mega progress report - var ( - trienode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.trienodeHealSynced), s.trienodeHealBytes.TerminalString()) - bytecode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.bytecodeHealSynced), s.bytecodeHealBytes.TerminalString()) - accounts = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.accountHealed), s.accountHealedBytes.TerminalString()) - storage = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.storageHealed), s.storageHealedBytes.TerminalString()) - ) - log.Info("Syncing: state healing in progress", "accounts", accounts, "slots", storage, - "codes", bytecode, "nodes", trienode, "pending", s.healer.scheduler.Pending()) -} - // estimateRemainingSlots tries to determine roughly how many slots are left in // a contract storage, based on the number of keys and the last hash. This method // assumes that the hashes are lexicographically ordered and evenly distributed. @@ -3204,6 +2473,28 @@ func estimateRemainingSlots(hashes int, last common.Hash) (uint64, error) { return space.Uint64() - uint64(hashes), nil } +// sortIdlePeers builds a list of idle peers sorted by download capacity +// (highest first), filtering out stateless peers. Must be called with s.lock held. +func (s *Syncer) sortIdlePeers(idlerSet map[string]struct{}, msgCode uint64) *capacitySort { + idlers := &capacitySort{ + ids: make([]string, 0, len(idlerSet)), + caps: make([]int, 0, len(idlerSet)), + } + targetTTL := s.rates.TargetTimeout() + for id := range idlerSet { + if _, ok := s.statelessPeers[id]; ok { + continue + } + idlers.ids = append(idlers.ids, id) + idlers.caps = append(idlers.caps, s.rates.Capacity(id, msgCode, targetTTL)) + } + if len(idlers.ids) == 0 { + return idlers + } + sort.Sort(sort.Reverse(idlers)) + return idlers +} + // capacitySort implements the Sort interface, allowing sorting by peer message // throughput. Note, callers should use sort.Reverse to get the desired effect // of highest capacity being at the front. diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index c506488e91..813360fc61 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -32,6 +32,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/core/types/bal" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/crypto/keccak" "github.com/ethereum/go-ethereum/ethdb" @@ -120,10 +121,10 @@ func BenchmarkHashing(b *testing.B) { } type ( - accountHandlerFunc func(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, limit common.Hash, cap int) error - storageHandlerFunc func(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max int) error - trieHandlerFunc func(t *testPeer, requestId uint64, root common.Hash, paths []TrieNodePathSet, cap int) error - codeHandlerFunc func(t *testPeer, id uint64, hashes []common.Hash, max int) error + accountHandlerFunc func(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, limit common.Hash, cap int) error + storageHandlerFunc func(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max int) error + codeHandlerFunc func(t *testPeer, id uint64, hashes []common.Hash, max int) error + accessListHandlerFunc func(t *testPeer, id uint64, hashes []common.Hash, max int) error ) type testPeer struct { @@ -135,30 +136,31 @@ type testPeer struct { accountValues []*kv storageTries map[common.Hash]*trie.Trie storageValues map[common.Hash][]*kv + accessLists map[common.Hash]rlp.RawValue // block hash -> RLP-encoded BAL - accountRequestHandler accountHandlerFunc - storageRequestHandler storageHandlerFunc - trieRequestHandler trieHandlerFunc - codeRequestHandler codeHandlerFunc - term func() + accountRequestHandler accountHandlerFunc + storageRequestHandler storageHandlerFunc + codeRequestHandler codeHandlerFunc + accessListRequestHandler accessListHandlerFunc + term func() // counters - nAccountRequests atomic.Int64 - nStorageRequests atomic.Int64 - nBytecodeRequests atomic.Int64 - nTrienodeRequests atomic.Int64 + nAccountRequests atomic.Int64 + nStorageRequests atomic.Int64 + nBytecodeRequests atomic.Int64 + nAccessListRequests atomic.Int64 } func newTestPeer(id string, t *testing.T, term func()) *testPeer { peer := &testPeer{ - id: id, - test: t, - logger: log.New("id", id), - accountRequestHandler: defaultAccountRequestHandler, - trieRequestHandler: defaultTrieRequestHandler, - storageRequestHandler: defaultStorageRequestHandler, - codeRequestHandler: defaultCodeRequestHandler, - term: term, + id: id, + test: t, + logger: log.New("id", id), + accountRequestHandler: defaultAccountRequestHandler, + storageRequestHandler: defaultStorageRequestHandler, + codeRequestHandler: defaultCodeRequestHandler, + accessListRequestHandler: defaultAccessListRequestHandler, + term: term, } //stderrHandler := log.StreamHandler(os.Stderr, log.TerminalFormat(true)) //peer.logger.SetHandler(stderrHandler) @@ -176,11 +178,7 @@ func (t *testPeer) ID() string { return t.id } func (t *testPeer) Log() log.Logger { return t.logger } func (t *testPeer) Stats() string { - return fmt.Sprintf(`Account requests: %d -Storage requests: %d -Bytecode requests: %d -Trienode requests: %d -`, t.nAccountRequests.Load(), t.nStorageRequests.Load(), t.nBytecodeRequests.Load(), t.nTrienodeRequests.Load()) + return fmt.Sprintf(`Account requests: %d Storage requests: %d Bytecode requests: %d`, t.nAccountRequests, t.nStorageRequests, t.nBytecodeRequests) } func (t *testPeer) RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes int) error { @@ -190,13 +188,6 @@ func (t *testPeer) RequestAccountRange(id uint64, root, origin, limit common.Has return nil } -func (t *testPeer) RequestTrieNodes(id uint64, root common.Hash, count int, paths []TrieNodePathSet, bytes int) error { - t.logger.Trace("Fetching set of trie nodes", "reqid", id, "root", root, "pathsets", len(paths), "bytes", common.StorageSize(bytes)) - t.nTrienodeRequests.Add(1) - go t.trieRequestHandler(t, id, root, paths, bytes) - return nil -} - func (t *testPeer) RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes int) error { t.nStorageRequests.Add(1) if len(accounts) == 1 && origin != nil { @@ -215,35 +206,14 @@ func (t *testPeer) RequestByteCodes(id uint64, hashes []common.Hash, bytes int) return nil } -// defaultTrieRequestHandler is a well-behaving handler for trie healing requests -func defaultTrieRequestHandler(t *testPeer, requestId uint64, root common.Hash, paths []TrieNodePathSet, cap int) error { - // Pass the response - var nodes [][]byte - for _, pathset := range paths { - switch len(pathset) { - case 1: - blob, _, err := t.accountTrie.GetNode(pathset[0]) - if err != nil { - t.logger.Info("Error handling req", "error", err) - break - } - nodes = append(nodes, blob) - default: - account := t.storageTries[(common.BytesToHash(pathset[0]))] - for _, path := range pathset[1:] { - blob, _, err := account.GetNode(path) - if err != nil { - t.logger.Info("Error handling req", "error", err) - break - } - nodes = append(nodes, blob) - } - } - } - t.remote.OnTrieNodes(t, requestId, nodes) +func (t *testPeer) RequestAccessLists(id uint64, hashes []common.Hash, bytes int) error { + t.nAccessListRequests++ + t.logger.Trace("Fetching set of BALs", "reqid", id, "hashes", len(hashes), "bytes", common.StorageSize(bytes)) + go t.accessListRequestHandler(t, id, hashes, bytes) return nil } +// defaultTrieRequestHandler is a well-behaving handler for trie healing requests // defaultAccountRequestHandler is a well-behaving handler for AccountRangeRequests func defaultAccountRequestHandler(t *testPeer, id uint64, root common.Hash, origin common.Hash, limit common.Hash, cap int) error { keys, vals, proofs := createAccountRequestResponse(t, root, origin, limit, cap) @@ -312,6 +282,25 @@ func defaultCodeRequestHandler(t *testPeer, id uint64, hashes []common.Hash, max return nil } +// defaultAccessListRequestHandler serves BALs from the peer's accessLists map. +// If the peer has no BAL data, it returns empty (peer rejection). +func defaultAccessListRequestHandler(t *testPeer, id uint64, hashes []common.Hash, max int) error { + var results []rlp.RawValue + if t.accessLists != nil { + for _, h := range hashes { + if raw, ok := t.accessLists[h]; ok { + results = append(results, raw) + } + } + } + rawList, _ := rlp.EncodeToRawList(results) + if err := t.remote.OnAccessLists(t, id, rawList); err != nil { + t.test.Errorf("Remote side rejected our delivery: %v", err) + t.term() + } + return nil +} + func createStorageRequestResponse(t *testPeer, root common.Hash, accounts []common.Hash, origin, limit []byte, max int) (hashes [][]common.Hash, slots [][][]byte, proofs [][]byte) { var size int for _, account := range accounts { @@ -443,15 +432,6 @@ func nonResponsiveRequestAccountRangeFn(t *testPeer, requestId uint64, root comm return nil } -func emptyTrieRequestHandler(t *testPeer, requestId uint64, root common.Hash, paths []TrieNodePathSet, cap int) error { - t.remote.OnTrieNodes(t, requestId, nil) - return nil -} - -func nonResponsiveTrieRequestHandler(t *testPeer, requestId uint64, root common.Hash, paths []TrieNodePathSet, cap int) error { - return nil -} - func emptyStorageRequestHandler(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max int) error { t.remote.OnStorage(t, requestId, nil, nil, nil) return nil @@ -470,12 +450,6 @@ func proofHappyStorageRequestHandler(t *testPeer, requestId uint64, root common. return nil } -//func emptyCodeRequestHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { -// var bytecodes [][]byte -// t.remote.OnByteCodes(t, id, bytecodes) -// return nil -//} - func corruptCodeRequestHandler(t *testPeer, id uint64, hashes []common.Hash, max int) error { var bytecodes [][]byte for _, h := range hashes { @@ -618,7 +592,7 @@ func testSyncBloatedProof(t *testing.T, scheme string) { return nil } syncer := setupSyncer(nodeScheme, source) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err == nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err == nil { t.Fatal("No error returned from incomplete/cancelled sync") } } @@ -660,7 +634,7 @@ func testSync(t *testing.T, scheme string) { return source } syncer := setupSyncer(nodeScheme, mkSource("source")) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) @@ -695,7 +669,7 @@ func testSyncTinyTriePanic(t *testing.T, scheme string) { } syncer := setupSyncer(nodeScheme, mkSource("source")) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -730,7 +704,7 @@ func testMultiSync(t *testing.T, scheme string) { } syncer := setupSyncer(nodeScheme, mkSource("sourceA"), mkSource("sourceB")) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -767,7 +741,7 @@ func testSyncWithStorage(t *testing.T, scheme string) { } syncer := setupSyncer(scheme, mkSource("sourceA")) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -794,7 +768,7 @@ func testMultiSyncManyUseless(t *testing.T, scheme string) { ) sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 3000, true, false, false) - mkSource := func(name string, noAccount, noStorage, noTrieNode bool) *testPeer { + mkSource := func(name string, noAccount, noStorage bool) *testPeer { source := newTestPeer(name, t, term) source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems @@ -807,21 +781,17 @@ func testMultiSyncManyUseless(t *testing.T, scheme string) { if !noStorage { source.storageRequestHandler = emptyStorageRequestHandler } - if !noTrieNode { - source.trieRequestHandler = emptyTrieRequestHandler - } return source } syncer := setupSyncer( scheme, - mkSource("full", true, true, true), - mkSource("noAccounts", false, true, true), - mkSource("noStorage", true, false, true), - mkSource("noTrie", true, true, false), + mkSource("full", true, true), + mkSource("noAccounts", false, true), + mkSource("noStorage", true, false), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -848,7 +818,7 @@ func testMultiSyncManyUselessWithLowTimeout(t *testing.T, scheme string) { ) sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 3000, true, false, false) - mkSource := func(name string, noAccount, noStorage, noTrieNode bool) *testPeer { + mkSource := func(name string, noAccount, noStorage bool) *testPeer { source := newTestPeer(name, t, term) source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems @@ -861,18 +831,14 @@ func testMultiSyncManyUselessWithLowTimeout(t *testing.T, scheme string) { if !noStorage { source.storageRequestHandler = emptyStorageRequestHandler } - if !noTrieNode { - source.trieRequestHandler = emptyTrieRequestHandler - } return source } syncer := setupSyncer( scheme, - mkSource("full", true, true, true), - mkSource("noAccounts", false, true, true), - mkSource("noStorage", true, false, true), - mkSource("noTrie", true, true, false), + mkSource("full", true, true), + mkSource("noAccounts", false, true), + mkSource("noStorage", true, false), ) // We're setting the timeout to very low, to increase the chance of the timeout // being triggered. This was previously a cause of panic, when a response @@ -880,7 +846,7 @@ func testMultiSyncManyUselessWithLowTimeout(t *testing.T, scheme string) { syncer.rates.OverrideTTLLimit = time.Millisecond done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -907,7 +873,7 @@ func testMultiSyncManyUnresponsive(t *testing.T, scheme string) { ) sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 3000, true, false, false) - mkSource := func(name string, noAccount, noStorage, noTrieNode bool) *testPeer { + mkSource := func(name string, noAccount, noStorage bool) *testPeer { source := newTestPeer(name, t, term) source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems @@ -920,24 +886,20 @@ func testMultiSyncManyUnresponsive(t *testing.T, scheme string) { if !noStorage { source.storageRequestHandler = nonResponsiveStorageRequestHandler } - if !noTrieNode { - source.trieRequestHandler = nonResponsiveTrieRequestHandler - } return source } syncer := setupSyncer( scheme, - mkSource("full", true, true, true), - mkSource("noAccounts", false, true, true), - mkSource("noStorage", true, false, true), - mkSource("noTrie", true, true, false), + mkSource("full", true, true), + mkSource("noAccounts", false, true), + mkSource("noStorage", true, false), ) // We're setting the timeout to very low, to make the test run a bit faster syncer.rates.OverrideTTLLimit = time.Millisecond done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -991,7 +953,7 @@ func testSyncBoundaryAccountTrie(t *testing.T, scheme string) { mkSource("peer-b"), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -1038,7 +1000,7 @@ func testSyncNoStorageAndOneCappedPeer(t *testing.T, scheme string) { mkSource("capped", true), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -1083,7 +1045,7 @@ func testSyncNoStorageAndOneCodeCorruptPeer(t *testing.T, scheme string) { mkSource("corrupt", corruptCodeRequestHandler), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -1126,7 +1088,7 @@ func testSyncNoStorageAndOneAccountCorruptPeer(t *testing.T, scheme string) { mkSource("corrupt", corruptAccountRequestHandler), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -1172,7 +1134,7 @@ func testSyncNoStorageAndOneCodeCappedPeer(t *testing.T, scheme string) { }), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -1224,7 +1186,7 @@ func testSyncBoundaryStorageTrie(t *testing.T, scheme string) { mkSource("peer-b"), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -1271,7 +1233,7 @@ func testSyncWithStorageAndOneCappedPeer(t *testing.T, scheme string) { mkSource("slow", true), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -1317,7 +1279,7 @@ func testSyncWithStorageAndCorruptPeer(t *testing.T, scheme string) { mkSource("corrupt", corruptStorageRequestHandler), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -1360,7 +1322,7 @@ func testSyncWithStorageAndNonProvingPeer(t *testing.T, scheme string) { mkSource("corrupt", noProofStorageRequestHandler), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -1400,7 +1362,7 @@ func testSyncWithStorageMisbehavingProve(t *testing.T, scheme string) { return source } syncer := setupSyncer(nodeScheme, mkSource("sourceA")) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) @@ -1439,7 +1401,7 @@ func testSyncWithUnevenStorage(t *testing.T, scheme string) { return source } syncer := setupSyncer(scheme, mkSource("source")) - if err := syncer.Sync(accountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(accountTrie.Hash(), 0, cancel); err != nil { t.Fatalf("sync failed: %v", err) } verifyTrie(scheme, syncer.db, accountTrie.Hash(), t) @@ -1521,6 +1483,42 @@ func makeAccountTrieNoStorage(n int, scheme string) (string, *trie.Trie, []*kv) return db.Scheme(), accTrie, entries } +// makeAccountTrieWithAddresses creates an account trie keyed by keccak(address), +// matching production behavior. Returns the trie, sorted entries, and the +// addresses used. This allows BAL-based tests to target specific addresses and +// have applyAccessList write to the same snapshot keys as the download. +func makeAccountTrieWithAddresses(n int, scheme string) (string, *trie.Trie, []*kv, []common.Address) { + var ( + db = triedb.NewDatabase(rawdb.NewMemoryDatabase(), newDbConfig(scheme)) + accTrie = trie.NewEmpty(db) + entries []*kv + addrs []common.Address + ) + for i := uint64(1); i <= uint64(n); i++ { + // Deterministic address from index + addr := common.BigToAddress(new(big.Int).SetUint64(i)) + addrs = append(addrs, addr) + + value, _ := rlp.EncodeToBytes(&types.StateAccount{ + Nonce: i, + Balance: uint256.NewInt(i), + Root: types.EmptyRootHash, + CodeHash: types.EmptyCodeHash[:], + }) + key := crypto.Keccak256(addr[:]) + elem := &kv{key, value} + accTrie.MustUpdate(elem.k, elem.v) + entries = append(entries, elem) + } + slices.SortFunc(entries, (*kv).cmp) + + root, nodes := accTrie.Commit(false) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), triedb.NewStateSet()) + + accTrie, _ = trie.New(trie.StateTrieID(root), db) + return db.Scheme(), accTrie, entries, addrs +} + // makeBoundaryAccountTrie constructs an account trie. Instead of filling // accounts normally, this function will fill a few accounts which have // boundary hash. @@ -1859,55 +1857,6 @@ func verifyTrie(scheme string, db ethdb.KeyValueStore, root common.Hash, t *test t.Logf("accounts: %d, slots: %d", accounts, slots) } -// TestSyncAccountPerformance tests how efficient the snap algo is at minimizing -// state healing -func TestSyncAccountPerformance(t *testing.T) { - // These tests must not run in parallel: they modify the - // global var accountConcurrency - // t.Parallel() - testSyncAccountPerformance(t, rawdb.HashScheme) - testSyncAccountPerformance(t, rawdb.PathScheme) -} - -func testSyncAccountPerformance(t *testing.T, scheme string) { - // Set the account concurrency to 1. This _should_ result in the - // range root to become correct, and there should be no healing needed - defer func(old int) { accountConcurrency = old }(accountConcurrency) - accountConcurrency = 1 - - var ( - once sync.Once - cancel = make(chan struct{}) - term = func() { - once.Do(func() { - close(cancel) - }) - } - ) - nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(100, scheme) - - mkSource := func(name string) *testPeer { - source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie.Copy() - source.accountValues = elems - return source - } - src := mkSource("source") - syncer := setupSyncer(nodeScheme, src) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { - t.Fatalf("sync failed: %v", err) - } - verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) - // The trie root will always be requested, since it is added when the snap - // sync cycle starts. When popping the queue, we do not look it up again. - // Doing so would bring this number down to zero in this artificial testcase, - // but only add extra IO for no reason in practice. - if have, want := src.nTrienodeRequests.Load(), int64(1); have != want { - fmt.Print(src.Stats()) - t.Errorf("trie node heal requests wrong, want %d, have %d", want, have) - } -} - func TestSlotEstimation(t *testing.T) { for i, tc := range []struct { last common.Hash @@ -1958,6 +1907,799 @@ func TestSlotEstimation(t *testing.T) { } } +// TestPivotMoveDetection verifies that when the syncer is restarted with a +// different root (simulating the downloader's cancel+restart on pivot move), +// download() returns errPivotStale immediately. +func TestPivotMoveDetection(t *testing.T) { + t.Parallel() + + rootA := common.HexToHash("0xaaaa") + rootB := common.HexToHash("0xbbbb") + + db := rawdb.NewMemoryDatabase() + syncer := NewSyncer(db, rawdb.HashScheme) + + // Simulate a previous sync run against rootA with some pending tasks + syncer.root = rootA + syncer.tasks = []*accountTask{ + {Next: common.Hash{}, Last: common.MaxHash, SubTasks: make(map[common.Hash][]*storageTask), stateCompleted: make(map[common.Hash]struct{})}, + } + syncer.saveSyncStatus() + + // Simulate downloader restarting us with rootB (as Sync() would do) + syncer.root = rootB + syncer.previousRoot = rootB // Sync() sets this as default + syncer.loadSyncStatus() // Overwrites previousRoot with persisted rootA + + if syncer.previousRoot != rootA { + t.Fatalf("previousRoot mismatch: got %v, want %v", syncer.previousRoot, rootA) + } + if syncer.root != rootB { + t.Fatalf("root mismatch: got %v, want %v", syncer.root, rootB) + } + // download() should detect the mismatch and return errPivotStale + cancel := make(chan struct{}) + err := syncer.download(cancel) + if err != errPivotStale { + t.Fatalf("expected errPivotStale, got %v", err) + } +} + +// TestNoPivotMoveOnSameRoot verifies that when the syncer is restarted with +// the same root, download() does not return errPivotStale. +func TestNoPivotMoveOnSameRoot(t *testing.T) { + t.Parallel() + + rootA := common.HexToHash("0xaaaa") + + db := rawdb.NewMemoryDatabase() + syncer := NewSyncer(db, rawdb.HashScheme) + + // Simulate a previous sync run against rootA + syncer.root = rootA + syncer.tasks = []*accountTask{ + {Next: common.Hash{}, Last: common.MaxHash, SubTasks: make(map[common.Hash][]*storageTask), stateCompleted: make(map[common.Hash]struct{})}, + } + syncer.saveSyncStatus() + + // Simulate restart with the same root + syncer.root = rootA + syncer.previousRoot = rootA + syncer.loadSyncStatus() + + if syncer.previousRoot != rootA { + t.Fatalf("previousRoot mismatch: got %v, want %v", syncer.previousRoot, rootA) + } + // previousRoot == root, so no pivot move detected + if syncer.previousRoot != syncer.root { + t.Fatalf("expected previousRoot == root, got %v != %v", syncer.previousRoot, syncer.root) + } +} + +// TestCatchUpInvertedRange verifies that catchUp returns an error and wipes +// sync progress when the new pivot is at the same (or lower) block number as +// the old pivot.. +func TestCatchUpInvertedRange(t *testing.T) { + t.Parallel() + db := rawdb.NewMemoryDatabase() + syncer := NewSyncer(db, rawdb.HashScheme) + + // Simulate: old pivot at block 100, new pivot at block 100 (same number, + // different root). This happens when a reorg replaces the pivot block. + syncer.previousNumber = 100 + syncer.number = 100 + + // Write some sync progress so we can verify it gets wiped + rawdb.WriteSnapshotSyncStatus(db, []byte("some progress")) + cancel := make(chan struct{}) + err := syncer.catchUp(cancel) + if err == nil { + t.Fatal("expected error from catchUp with inverted range") + } + + // Verify sync progress was wiped + if status := rawdb.ReadSnapshotSyncStatus(db); status != nil { + t.Fatal("sync progress should be wiped after inverted catch-up range") + } +} + +// TestFlatStateDownload verifies that download() writes flat state to disk +// and makes no trie node requests. +func TestFlatStateDownload(t *testing.T) { + t.Parallel() + testFlatStateDownload(t, rawdb.HashScheme) + testFlatStateDownload(t, rawdb.PathScheme) +} + +func testFlatStateDownload(t *testing.T, scheme string) { + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { + once.Do(func() { + close(cancel) + }) + } + ) + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(100, scheme) + mkSource := func(name string) *testPeer { + source := newTestPeer(name, t, term) + source.accountTrie = sourceAccountTrie.Copy() + source.accountValues = elems + return source + } + syncer := setupSyncer(nodeScheme, mkSource("source")) + + // Call download() directly to avoid rebuildTrie + syncer.root = sourceAccountTrie.Hash() + syncer.previousRoot = syncer.root // No pivot move + syncer.loadSyncStatus() + if err := syncer.download(cancel); err != nil { + t.Fatalf("download failed: %v", err) + } + + // Verify flat state was written + for _, entry := range elems { + hash := common.BytesToHash(entry.k) + data := rawdb.ReadAccountSnapshot(syncer.db, hash) + if len(data) == 0 { + t.Errorf("missing account snapshot for %x", hash) + } + } +} + +// TestInterruptedDownloadRecovery verifies that partially completed download +// state is persisted and resumed on restart. +func TestInterruptedDownloadRecovery(t *testing.T) { + t.Parallel() + testInterruptedDownloadRecovery(t, rawdb.HashScheme) + testInterruptedDownloadRecovery(t, rawdb.PathScheme) +} + +func testInterruptedDownloadRecovery(t *testing.T, scheme string) { + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(100, scheme) + root := sourceAccountTrie.Hash() + + // Cancel after exactly 2 account range responses, guaranteeing partial + // completion without any timing dependency. + var ( + once1 sync.Once + cancel1 = make(chan struct{}) + term1 = func() { once1.Do(func() { close(cancel1) }) } + responses atomic.Int32 + ) + cancelAfterHandler := func(tp *testPeer, id uint64, root common.Hash, origin common.Hash, limit common.Hash, cap int) error { + if responses.Add(1) > 2 { + term1() + return nil + } + return defaultAccountRequestHandler(tp, id, root, origin, limit, cap) + } + db := rawdb.NewMemoryDatabase() + syncer1 := NewSyncer(db, nodeScheme) + src1 := newTestPeer("source1", t, term1) + src1.accountTrie = sourceAccountTrie.Copy() + src1.accountValues = elems + src1.accountRequestHandler = cancelAfterHandler + syncer1.Register(src1) + src1.remote = syncer1 + syncer1.root = root + syncer1.previousRoot = root + syncer1.loadSyncStatus() + syncer1.download(cancel1) + + // Save progress + for _, task := range syncer1.tasks { + syncer1.forwardAccountTask(task) + } + syncer1.cleanAccountTasks() + syncer1.saveSyncStatus() + + // Count how many accounts were downloaded in the first run. + // Due to the async nature of response processing, the cancel may race + // with delivery so 0 accounts may be written. + firstRunCount := 0 + for _, entry := range elems { + if data := rawdb.ReadAccountSnapshot(db, common.BytesToHash(entry.k)); len(data) > 0 { + firstRunCount++ + } + } + if firstRunCount == len(elems) { + t.Fatal("first run should not have downloaded everything") + } + + // Second run: resume with same root, should complete the download + var ( + once2 sync.Once + cancel2 = make(chan struct{}) + term2 = func() { once2.Do(func() { close(cancel2) }) } + ) + syncer2 := NewSyncer(db, nodeScheme) + src2 := newTestPeer("source2", t, term2) + src2.accountTrie = sourceAccountTrie.Copy() + src2.accountValues = elems + syncer2.Register(src2) + src2.remote = syncer2 + syncer2.root = root + syncer2.previousRoot = root + syncer2.loadSyncStatus() + if err := syncer2.download(cancel2); err != nil { + t.Fatalf("resumed download failed: %v", err) + } + + // Verify all accounts are now present + for _, entry := range elems { + if data := rawdb.ReadAccountSnapshot(db, common.BytesToHash(entry.k)); len(data) == 0 { + t.Errorf("missing account after resumed download: %x", entry.k) + } + } +} + +// TestPivotMovement verifies the full pivot move flow: download with rootA, +// cancel+restart with rootB, catch-up applies BAL diffs, download resumes +// and completes against the new state. +func TestPivotMovement(t *testing.T) { + t.Parallel() + testPivotMovement(t, rawdb.HashScheme, 1) +} + +// TestPivotMovementRepeated verifies that multiple pivot moves work correctly. +func TestPivotMovementRepeated(t *testing.T) { + t.Parallel() + testPivotMovement(t, rawdb.HashScheme, 2) +} + +func testPivotMovement(t *testing.T, scheme string, pivotMoves int) { + // Use makeAccountTrieWithAddresses so trie keys are keccak(addr), + // matching what applyAccessList writes to the snapshot DB. + nodeScheme, sourceAccountTrie, elems, addrs := makeAccountTrieWithAddresses(100, scheme) + numA := uint64(100) + + // Target account 50 for BAL changes + targetAddr := addrs[49] + targetHash := crypto.Keccak256Hash(targetAddr[:]) + + type pivotMove struct { + blockNum uint64 + trie *trie.Trie + elems []*kv + root common.Hash + bals map[common.Hash]rlp.RawValue // header hash -> encoded BAL + balance *uint256.Int + } + + // Build each pivot move: update account 50's balance in both the trie + // and a BAL, write the header, and record everything. + db := rawdb.NewMemoryDatabase() + currentElems := elems + moves := make([]pivotMove, pivotMoves) + emptyHash := common.Hash{} + zero := uint64(0) + for m := 0; m < pivotMoves; m++ { + blockNum := numA + uint64(m) + 1 + balance := uint256.NewInt(uint64(1000 * (m + 1))) + + // Build updated trie with new balance for account 50 + trieDB := triedb.NewDatabase(rawdb.NewMemoryDatabase(), newDbConfig(scheme)) + newTrie := trie.NewEmpty(trieDB) + newElems := make([]*kv, len(currentElems)) + for i, entry := range currentElems { + if bytes.Equal(entry.k, targetHash[:]) { + val, _ := rlp.EncodeToBytes(&types.StateAccount{ + Nonce: 50, Balance: balance, + Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash[:], + }) + newElems[i] = &kv{entry.k, val} + } else { + newElems[i] = entry + } + newTrie.MustUpdate(newElems[i].k, newElems[i].v) + } + newRoot, nodes := newTrie.Commit(false) + trieDB.Update(newRoot, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), triedb.NewStateSet()) + resultTrie, _ := trie.New(trie.StateTrieID(newRoot), trieDB) + + // Build BAL matching the trie diff + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, targetAddr, balance) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + + // Compute BAL hash, write header, store BAL keyed by header hash + var b bal.BlockAccessList + if err := rlp.DecodeBytes(buf.Bytes(), &b); err != nil { + t.Fatal(err) + } + balHash := b.Hash() + header := &types.Header{ + Number: new(big.Int).SetUint64(blockNum), Difficulty: common.Big0, + BaseFee: common.Big0, WithdrawalsHash: &emptyHash, + BlobGasUsed: &zero, ExcessBlobGas: &zero, + ParentBeaconRoot: &emptyHash, RequestsHash: &emptyHash, + BlockAccessListHash: &balHash, + } + rawdb.WriteHeader(db, header) + headerHash := header.Hash() + rawdb.WriteCanonicalHash(db, headerHash, blockNum) + moves[m] = pivotMove{ + blockNum: blockNum, + trie: resultTrie, + elems: newElems, + root: newRoot, + bals: map[common.Hash]rlp.RawValue{headerHash: buf.Bytes()}, + balance: balance, + } + currentElems = newElems + } + + // First run: download against rootA, cancel after 2 responses + rootA := sourceAccountTrie.Hash() + var ( + once1 sync.Once + cancel1 = make(chan struct{}) + term1 = func() { once1.Do(func() { close(cancel1) }) } + responses atomic.Int32 + ) + syncer1 := NewSyncer(db, nodeScheme) + src1 := newTestPeer("source1", t, term1) + src1.accountTrie = sourceAccountTrie.Copy() + src1.accountValues = elems + src1.accountRequestHandler = func(tp *testPeer, id uint64, root common.Hash, origin common.Hash, limit common.Hash, cap int) error { + if responses.Add(1) > 2 { + term1() + return nil + } + return defaultAccountRequestHandler(tp, id, root, origin, limit, cap) + } + syncer1.Register(src1) + src1.remote = syncer1 + syncer1.Sync(rootA, numA, cancel1) + + // Subsequent runs: each move triggers catch-up then resumes download + for i, move := range moves { + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + syncer := NewSyncer(db, nodeScheme) + src := newTestPeer(fmt.Sprintf("source-%d", i+2), t, term) + src.accountTrie = move.trie.Copy() + src.accountValues = move.elems + src.accessLists = move.bals + syncer.Register(src) + src.remote = syncer + if err := syncer.Sync(move.root, move.blockNum, cancel); err != nil { + t.Fatalf("pivot move %d: sync failed: %v", i+1, err) + } + + // Verify account 50's balance was updated by catch-up + data := rawdb.ReadAccountSnapshot(db, targetHash) + if len(data) == 0 { + t.Fatalf("pivot move %d: account 50 not found after sync", i+1) + } + account, aErr := types.FullAccount(data) + if aErr != nil { + t.Fatalf("pivot move %d: failed to decode account: %v", i+1, aErr) + } + if account.Balance.Cmp(move.balance) != 0 { + t.Errorf("pivot move %d: balance wrong: got %v, want %v", i+1, account.Balance, move.balance) + } + } +} + +// TestSyncStatusClearedAfterCompletion verifies that the persisted sync status +// is cleared after a full sync completes (download + trie rebuild), so the +// next Sync() call starts fresh. +func TestSyncStatusClearedAfterCompletion(t *testing.T) { + t.Parallel() + testSyncStatusClearedAfterCompletion(t, rawdb.HashScheme) + testSyncStatusClearedAfterCompletion(t, rawdb.PathScheme) +} + +func testSyncStatusClearedAfterCompletion(t *testing.T, scheme string) { + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(100, scheme) + + mkSource := func(name string) *testPeer { + source := newTestPeer(name, t, term) + source.accountTrie = sourceAccountTrie.Copy() + source.accountValues = elems + return source + } + syncer := setupSyncer(nodeScheme, mkSource("source")) + if err := syncer.Sync(sourceAccountTrie.Hash(), 0, cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } + // After successful sync, status should be cleared + if status := rawdb.ReadSnapshotSyncStatus(syncer.db); status != nil { + t.Fatal("sync status should be nil after successful completion") + } +} + +// TestInterruptedRebuildRecovery verifies that if sync is interrupted after +// download completes but before trie rebuild finishes, the next Sync() call +// re-runs the download (which completes immediately) and rebuild. +func TestInterruptedRebuildRecovery(t *testing.T) { + t.Parallel() + + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(100, rawdb.HashScheme) + root := sourceAccountTrie.Hash() + + // First run: complete download, save status, simulate interruption + // before rebuild by calling download() directly + var ( + once1 sync.Once + cancel1 = make(chan struct{}) + term1 = func() { once1.Do(func() { close(cancel1) }) } + ) + db := rawdb.NewMemoryDatabase() + syncer1 := NewSyncer(db, nodeScheme) + src1 := newTestPeer("source1", t, term1) + src1.accountTrie = sourceAccountTrie.Copy() + src1.accountValues = elems + syncer1.Register(src1) + src1.remote = syncer1 + syncer1.root = root + syncer1.previousRoot = root + syncer1.loadSyncStatus() + + if err := syncer1.download(cancel1); err != nil { + t.Fatalf("download failed: %v", err) + } + // Save status (simulating what Sync's defer does) + for _, task := range syncer1.tasks { + syncer1.forwardAccountTask(task) + } + syncer1.cleanAccountTasks() + syncer1.saveSyncStatus() + + // Status should exist (rebuild hasn't run yet) + if rawdb.ReadSnapshotSyncStatus(db) == nil { + t.Fatal("sync status should exist after download") + } + // Second run: full Sync should detect tasks are done, run rebuild + var ( + once2 sync.Once + cancel2 = make(chan struct{}) + term2 = func() { once2.Do(func() { close(cancel2) }) } + ) + syncer2 := NewSyncer(db, nodeScheme) + src2 := newTestPeer("source2", t, term2) + src2.accountTrie = sourceAccountTrie.Copy() + src2.accountValues = elems + syncer2.Register(src2) + src2.remote = syncer2 + + if err := syncer2.Sync(root, 0, cancel2); err != nil { + t.Fatalf("resumed sync failed: %v", err) + } + // After rebuild completes, status should be cleared + if status := rawdb.ReadSnapshotSyncStatus(db); status != nil { + t.Fatal("sync status should be nil after rebuild completes") + } +} + +// TestFetchAccessListsSinglePeer verifies fetching BALs from a single peer. +func TestFetchAccessListsSinglePeer(t *testing.T) { + t.Parallel() + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + + // Create test BALs + hashes := []common.Hash{ + common.HexToHash("0x01"), + common.HexToHash("0x02"), + common.HexToHash("0x03"), + } + bals := make(map[common.Hash]rlp.RawValue) + for _, h := range hashes { + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, common.HexToAddress("0xaa"), uint256.NewInt(uint64(h[31]))) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + bals[h] = buf.Bytes() + } + source := newTestPeer("source", t, term) + source.accessLists = bals + syncer := setupSyncer(rawdb.HashScheme, source) + results, err := syncer.fetchAccessLists(hashes, cancel) + if err != nil { + t.Fatalf("fetchAccessLists failed: %v", err) + } + if len(results) != len(hashes) { + t.Fatalf("result count mismatch: got %d, want %d", len(results), len(hashes)) + } + + // Verify results match input order + for i, h := range hashes { + if !bytes.Equal(results[i], bals[h]) { + t.Errorf("result %d mismatch", i) + } + } +} + +// TestFetchAccessListsMultiplePeers verifies that fetch distributes work +// across multiple idle peers. +func TestFetchAccessListsMultiplePeers(t *testing.T) { + t.Parallel() + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + + // Create enough BALs to potentially split across peers + var hashes []common.Hash + bals := make(map[common.Hash]rlp.RawValue) + for i := 0; i < 10; i++ { + h := common.HexToHash(fmt.Sprintf("0x%02x", i+1)) + hashes = append(hashes, h) + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, common.HexToAddress("0xaa"), uint256.NewInt(uint64(i))) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + bals[h] = buf.Bytes() + } + mkSource := func(name string) *testPeer { + source := newTestPeer(name, t, term) + source.accessLists = bals + return source + } + syncer := setupSyncer(rawdb.HashScheme, mkSource("peer-a"), mkSource("peer-b"), mkSource("peer-c")) + results, err := syncer.fetchAccessLists(hashes, cancel) + if err != nil { + t.Fatalf("fetchAccessLists failed: %v", err) + } + if len(results) != len(hashes) { + t.Fatalf("result count mismatch: got %d, want %d", len(results), len(hashes)) + } +} + +// TestFetchAccessListsPeerTimeout verifies that timed-out requests are retried +// with a different peer. +func TestFetchAccessListsPeerTimeout(t *testing.T) { + t.Parallel() + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + hashes := []common.Hash{common.HexToHash("0x01")} + bals := make(map[common.Hash]rlp.RawValue) + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, common.HexToAddress("0xaa"), uint256.NewInt(42)) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + bals[hashes[0]] = buf.Bytes() + + // First peer never responds + nonResponsive := newTestPeer("non-responsive", t, term) + nonResponsive.accessListRequestHandler = func(t *testPeer, id uint64, hashes []common.Hash, max int) error { + // Don't respond — let it time out + return nil + } + + // Second peer serves correctly + good := newTestPeer("good", t, term) + good.accessLists = bals + syncer := setupSyncer(rawdb.HashScheme, nonResponsive, good) + syncer.rates.OverrideTTLLimit = time.Millisecond // Fast timeout + results, err := syncer.fetchAccessLists(hashes, cancel) + if err != nil { + t.Fatalf("fetchAccessLists failed: %v", err) + } + if len(results) != 1 { + t.Fatalf("result count mismatch: got %d, want 1", len(results)) + } +} + +// TestFetchAccessListsPeerRejection verifies that peers returning empty +// responses are marked stateless and work is retried with another peer. +func TestFetchAccessListsPeerRejection(t *testing.T) { + t.Parallel() + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + hashes := []common.Hash{common.HexToHash("0x01")} + bals := make(map[common.Hash]rlp.RawValue) + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, common.HexToAddress("0xaa"), uint256.NewInt(42)) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + bals[hashes[0]] = buf.Bytes() + + // First peer rejects (has no BAL data, returns empty) + // accessLists is nil, so defaultAccessListRequestHandler returns empty + rejector := newTestPeer("rejector", t, term) + + // Second peer serves correctly + good := newTestPeer("good", t, term) + good.accessLists = bals + syncer := setupSyncer(rawdb.HashScheme, rejector, good) + results, err := syncer.fetchAccessLists(hashes, cancel) + if err != nil { + t.Fatalf("fetchAccessLists failed: %v", err) + } + if len(results) != 1 { + t.Fatalf("result count mismatch: got %d, want 1", len(results)) + } +} + +// TestFetchAccessListsCancel verifies that fetchAccessLists returns promptly +// when cancelled. +func TestFetchAccessListsCancel(t *testing.T) { + t.Parallel() + cancel := make(chan struct{}) + + // Peer that never responds + nonResponsive := newTestPeer("non-responsive", t, func() {}) + nonResponsive.accessListRequestHandler = func(t *testPeer, id uint64, hashes []common.Hash, max int) error { + return nil // never deliver + } + syncer := setupSyncer(rawdb.HashScheme, nonResponsive) + hashes := []common.Hash{common.HexToHash("0x01")} + + // Cancel after a short delay + go func() { + time.Sleep(50 * time.Millisecond) + close(cancel) + }() + _, err := syncer.fetchAccessLists(hashes, cancel) + if err != ErrCancelled { + t.Fatalf("expected ErrCancelled, got %v", err) + } +} + +// TestFetchAccessListsPeerDrop verifies that dropping a peer mid-request +// causes the request to be retried with a different peer. +func TestFetchAccessListsPeerDrop(t *testing.T) { + t.Parallel() + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + hashes := []common.Hash{common.HexToHash("0x01")} + bals := make(map[common.Hash]rlp.RawValue) + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, common.HexToAddress("0xaa"), uint256.NewInt(42)) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + bals[hashes[0]] = buf.Bytes() + + // First peer will be dropped mid-request + dropped := newTestPeer("dropped", t, term) + dropped.accessListRequestHandler = func(tp *testPeer, id uint64, hashes []common.Hash, max int) error { + // Simulate peer dropping by unregistering + tp.remote.Unregister(tp.id) + return nil + } + + // Second peer serves correctly + good := newTestPeer("good", t, term) + good.accessLists = bals + syncer := setupSyncer(rawdb.HashScheme, dropped, good) + results, err := syncer.fetchAccessLists(hashes, cancel) + if err != nil { + t.Fatalf("fetchAccessLists failed: %v", err) + } + if len(results) != 1 { + t.Fatalf("result count mismatch: got %d, want 1", len(results)) + } +} + +// TestFetchAccessListsShortResponse verifies that when a peer returns fewer +// BALs than requested (a short/partial response), the un-served hashes are +// retried and eventually all results are collected. +func TestFetchAccessListsShortResponse(t *testing.T) { + t.Parallel() + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + + // Request 4 hashes but the peer only returns the first 2. + hashes := []common.Hash{ + common.HexToHash("0x01"), + common.HexToHash("0x02"), + common.HexToHash("0x03"), + common.HexToHash("0x04"), + } + allBALs := make(map[common.Hash]rlp.RawValue) + for _, h := range hashes { + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, common.HexToAddress("0xaa"), uint256.NewInt(uint64(h[31]))) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + allBALs[h] = buf.Bytes() + } + + // shortPeer returns only the first 2 BALs regardless of how many are + // requested. This simulates a peer that truncates its response (e.g., + // hitting the 2 MiB response soft limit). + shortPeer := newTestPeer("short", t, term) + shortPeer.accessListRequestHandler = func(tp *testPeer, id uint64, reqHashes []common.Hash, max int) error { + // Return only the first 2 of however many were requested. + limit := 2 + if len(reqHashes) < limit { + limit = len(reqHashes) + } + var results []rlp.RawValue + for i := 0; i < limit; i++ { + results = append(results, allBALs[reqHashes[i]]) + } + rawList, _ := rlp.EncodeToRawList(results) + if err := tp.remote.OnAccessLists(tp, id, rawList); err != nil { + tp.test.Errorf("delivery rejected: %v", err) + tp.term() + } + return nil + } + syncer := setupSyncer(rawdb.HashScheme, shortPeer) + + // Pre-seed the rate tracker so the peer's capacity for AccessListsMsg is + // high enough to get all 4 hashes assigned in a single request. Without + // this, the default capacity is 1, so the peer would only get 1 hash per + // round and the short-response scenario never triggers. + syncer.rates.Update(shortPeer.id, AccessListsMsg, time.Millisecond, 100) + + // If the bug exists, this will hang. + done := make(chan struct{}) + var ( + results []rlp.RawValue + fetchErr error + ) + go func() { + results, fetchErr = syncer.fetchAccessLists(hashes, cancel) + close(done) + }() + + select { + case <-done: + // fetchAccessLists returned + case <-time.After(5 * time.Second): + t.Fatal("fetchAccessLists has hung. This means unserved hashes were never re-added to pending.") + } + if fetchErr != nil { + t.Fatalf("fetchAccessLists failed: %v", fetchErr) + } + if len(results) != len(hashes) { + t.Fatalf("result count mismatch: got %d, want %d", len(results), len(hashes)) + } + + // Verify all results are non-nil and in correct order + for i, h := range hashes { + if results[i] == nil { + t.Errorf("result %d (hash %v) is nil", i, h) + } + } +} + func newDbConfig(scheme string) *triedb.Config { if scheme == rawdb.HashScheme { return &triedb.Config{}