diff --git a/cmd/devp2p/internal/ethtest/conn.go b/cmd/devp2p/internal/ethtest/conn.go index 02579f8b55..fb655eba18 100644 --- a/cmd/devp2p/internal/ethtest/conn.go +++ b/cmd/devp2p/internal/ethtest/conn.go @@ -84,6 +84,19 @@ func (s *Suite) dialSnap() (*Conn, error) { return conn, nil } +// dialSnap2 creates a connection advertising snap/2 as the only snap capability. +// This is used by the snap/2 (EIP-8189) test suite to force the peer to +// negotiate snap/2 rather than falling back to snap/1. +func (s *Suite) dialSnap2() (*Conn, error) { + conn, err := s.dial() + if err != nil { + return nil, fmt.Errorf("dial failed: %v", err) + } + conn.caps = append(conn.caps, p2p.Cap{Name: "snap", Version: 2}) + conn.ourHighestSnapProtoVersion = 2 + return conn, nil +} + // Conn represents an individual connection with a peer type Conn struct { *rlpx.Conn @@ -183,7 +196,10 @@ func (c *Conn) ReadEth() (any, error) { } } -// ReadSnap reads a snap/1 response with the given id from the connection. +// ReadSnap reads a snap protocol response from the connection. It decodes +// the full message catalog of both snap/1 and snap/2. The caller is +// expected to only receive codes that were actually valid on the +// negotiated protocol version. func (c *Conn) ReadSnap() (any, error) { c.SetReadDeadline(time.Now().Add(timeout)) for { @@ -215,6 +231,10 @@ func (c *Conn) ReadSnap() (any, error) { msg = new(snap.GetTrieNodesPacket) case snap.TrieNodesMsg: msg = new(snap.TrieNodesPacket) + case snap.GetAccessListsMsg: + msg = new(snap.GetAccessListsPacket) + case snap.AccessListsMsg: + msg = new(snap.AccessListsPacket) default: panic(fmt.Errorf("unhandled snap code: %d", code)) } diff --git a/cmd/devp2p/internal/ethtest/protocol.go b/cmd/devp2p/internal/ethtest/protocol.go index a21d1ca7a1..f865869093 100644 --- a/cmd/devp2p/internal/ethtest/protocol.go +++ b/cmd/devp2p/internal/ethtest/protocol.go @@ -33,7 +33,11 @@ const ( const ( baseProtoLen = 16 ethProtoLen = 18 - snapProtoLen = 8 + // snapProtoLen accommodates snap/2 (EIP-8189) which extends snap/1 with two + // additional message codes (GetBlockAccessLists=0x08, BlockAccessLists=0x09). + // Using 10 is safe for snap/1 connections because the extra codes are simply + // never used on that protocol version. + snapProtoLen = 10 ) // Unexported handshake structure from p2p/peer.go. diff --git a/cmd/devp2p/internal/ethtest/snap2.go b/cmd/devp2p/internal/ethtest/snap2.go new file mode 100644 index 0000000000..1124ca8832 --- /dev/null +++ b/cmd/devp2p/internal/ethtest/snap2.go @@ -0,0 +1,375 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +package ethtest + +import ( + "bytes" + "fmt" + "math/rand" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/core/types/bal" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/eth/protocols/snap" + "github.com/ethereum/go-ethereum/internal/utesting" + "github.com/ethereum/go-ethereum/rlp" +) + +// Snap/2 (EIP-8189) replaces trie node healing with BAL-based state catch-up. +// It keeps 0x00..0x05 (AccountRange/StorageRanges/ByteCodes) unchanged, removes +// GetTrieNodes (0x06) / TrieNodes (0x07), and adds GetBlockAccessLists (0x08) / +// BlockAccessLists (0x09). +// +// The tests in this file focus on the wire behavior that is new or changed in +// snap/2. Tests for the unchanged messages are already covered by the snap/1 +// suite in snap.go; the harness reuses the same code paths because those +// message formats are identical across versions. + +// TestSnap2Status performs an RLPx+eth+snap/2 handshake against the node, +// verifying that the node advertises and negotiates snap/2. +func (s *Suite) TestSnap2Status(t *utesting.T) { + t.Log(`This test performs a snap/2 (EIP-8189) handshake. The peer is expected to +advertise snap/2 as a p2p capability and accept the connection.`) + + conn, err := s.dialSnap2() + if err != nil { + t.Fatalf("dial failed: %v", err) + } + defer conn.Close() + if err := conn.peer(s.chain, nil); err != nil { + t.Fatalf("peering failed: %v", err) + } + if conn.negotiatedSnapProtoVersion != 2 { + t.Fatalf("unexpected negotiated snap version: got %d, want 2", conn.negotiatedSnapProtoVersion) + } +} + +type accessListsTest struct { + nBytes uint64 + hashes []common.Hash + + // minEntries/maxEntries bound the number of entries the response list + // MUST contain. Per EIP-8189 the server may truncate from the tail when + // the byte soft limit is reached, but MUST preserve request order. + minEntries int + maxEntries int + + desc string +} + +// TestSnap2GetBlockAccessLists exercises various forms of GetBlockAccessLists +// requests defined in EIP-8189. Per the spec: +// +// - Nodes MUST always respond. +// - Unavailable BALs are returned as the RLP empty string (0x80) at the +// matching position. +// - The server MAY return fewer entries than requested (respecting the byte +// soft limit or QoS limits), truncating from the tail. +// - Returned entries MUST preserve request order. +// - When a BAL is returned, its keccak256(rlp.encode(bal)) MUST match the +// block-access-list-hash field of the corresponding block header. +func (s *Suite) TestSnap2GetBlockAccessLists(t *utesting.T) { + var ( + head = s.chain.Head() + headHash = head.Hash() + preHash = s.chain.blocks[s.chain.Len()-2].Hash() + unknown = common.HexToHash("0xdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef") + ) + + // Collect a window of recent canonical block hashes. Limit to at most 16 + // entries to keep the request small and well under any reasonable limit. + var recent []common.Hash + start := s.chain.Len() - 16 + if start < 1 { + start = 1 + } + for i := start; i < s.chain.Len(); i++ { + recent = append(recent, s.chain.blocks[i].Hash()) + } + + tests := []accessListsTest{ + { + desc: `An empty request. The server must respond with an empty list and must +not disconnect.`, + nBytes: softResponseLimitSnap, + hashes: nil, + minEntries: 0, + maxEntries: 0, + }, + { + desc: `A request for a single random/unknown block hash. Per the spec the +server must respond and include an RLP empty string (0x80) at that position.`, + nBytes: softResponseLimitSnap, + hashes: []common.Hash{unknown}, + minEntries: 1, + maxEntries: 1, + }, + { + desc: `A request for multiple random/unknown block hashes. The server must +preserve request order and return an RLP empty string for each position.`, + nBytes: softResponseLimitSnap, + hashes: []common.Hash{ + unknown, + common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000001"), + common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000002"), + }, + minEntries: 3, + maxEntries: 3, + }, + { + desc: `A request for the chain head. The server must respond. If the node is +post-Amsterdam and has the BAL for this block, the returned BAL must hash to +the block-access-list-hash in the header. Otherwise an empty entry is valid.`, + nBytes: softResponseLimitSnap, + hashes: []common.Hash{headHash}, + minEntries: 1, + maxEntries: 1, + }, + { + desc: `A request for the chain head and its parent. The server must return +exactly two entries, in request order.`, + nBytes: softResponseLimitSnap, + hashes: []common.Hash{headHash, preHash}, + minEntries: 2, + maxEntries: 2, + }, + { + desc: `A mixed request with known and unknown hashes. The server must +return entries in request order, with the RLP empty string at positions +corresponding to unknown hashes.`, + nBytes: softResponseLimitSnap, + hashes: []common.Hash{headHash, unknown, preHash, unknown}, + // We expect exactly 4 entries — mixed responses are small and well + // under the byte limit, so truncation is not expected. + minEntries: 4, + maxEntries: 4, + }, + { + desc: `A request spanning the most recent canonical window. Implementations +may serve or drop individual entries, but the entries that are returned must +preserve request order.`, + nBytes: softResponseLimitSnap, + hashes: recent, + minEntries: 0, + maxEntries: len(recent), + }, + { + desc: `A request with a very small byte soft limit. The server must return +at least zero entries and no more than the requested number, truncating from +the tail. It must not disconnect.`, + nBytes: 1, + hashes: recent, + minEntries: 0, + maxEntries: len(recent), + }, + { + desc: `A request with a zero byte soft limit. The server must still respond +(possibly with an empty list) and must not disconnect.`, + nBytes: 0, + hashes: recent, + minEntries: 0, + maxEntries: len(recent), + }, + { + desc: `A request containing the same hash repeated. The server must treat +each position independently and preserve request order.`, + nBytes: softResponseLimitSnap, + hashes: []common.Hash{headHash, headHash, headHash}, + minEntries: 3, + maxEntries: 3, + }, + } + + for i, tc := range tests { + if i > 0 { + t.Log("\n") + } + t.Logf("-- Test %d", i) + t.Log(tc.desc) + t.Log(" request:") + t.Logf(" hashes: %d", len(tc.hashes)) + t.Logf(" responseBytes: %d", tc.nBytes) + if err := s.snapGetAccessLists(t, &tc); err != nil { + t.Errorf("test %d failed: %v", i, err) + } + } +} + +// TestSnap2TrieNodesRemoved verifies that snap/2 no longer serves the +// GetTrieNodes message (0x06). Per EIP-8189, snap/2 removes GetTrieNodes and +// TrieNodes entirely. A server that negotiated snap/2 must not treat these +// codes as valid snap messages and should disconnect the peer that sends them. +func (s *Suite) TestSnap2TrieNodesRemoved(t *utesting.T) { + t.Log(`This test verifies that sending a GetTrieNodes message over a snap/2 +connection causes the peer to reject the request. Per EIP-8189, GetTrieNodes +is removed in snap/2.`) + + conn, err := s.dialSnap2() + if err != nil { + t.Fatalf("dial failed: %v", err) + } + defer conn.Close() + if err := conn.peer(s.chain, nil); err != nil { + t.Fatalf("peering failed: %v", err) + } + + // Build a syntactically valid GetTrieNodes request to the head state root. + paths, err := rlp.EncodeToRawList([]snap.TrieNodePathSet{{[]byte{0}}}) + if err != nil { + t.Fatalf("failed to encode paths: %v", err) + } + req := &snap.GetTrieNodesPacket{ + ID: uint64(rand.Int63()), + Root: s.chain.Head().Root(), + Paths: paths, + Bytes: 5000, + } + if err := conn.Write(snapProto, snap.GetTrieNodesMsg, req); err != nil { + t.Fatalf("failed to write GetTrieNodes: %v", err) + } + + // We expect either a disconnect or a read error/timeout. We must NOT + // receive a valid TrieNodes response. Loop a few times to consume any + // incidental messages the peer might send (e.g. block updates) before + // deciding. + for i := 0; i < 5; i++ { + msg, err := conn.ReadSnap() + if err != nil { + // Disconnect or read error — the peer rejected the request. + return + } + if _, ok := msg.(*snap.TrieNodesPacket); ok { + t.Fatal("peer responded with TrieNodes over snap/2; GetTrieNodes must be unsupported") + } + } + t.Fatal("peer did not reject GetTrieNodes over snap/2 within the observation window") +} + +// softResponseLimitSnap mirrors the recommended 2 MiB soft limit for +// BlockAccessLists responses from EIP-8189 §"Response Size Limit". +const softResponseLimitSnap = 2 * 1024 * 1024 + +// snapGetAccessLists sends a GetBlockAccessLists request, validates the +// response structure against EIP-8189, and verifies BAL content against the +// block-access-list-hash field of the corresponding block header (when the +// block is known and a BAL was returned). +func (s *Suite) snapGetAccessLists(t *utesting.T, tc *accessListsTest) error { + conn, err := s.dialSnap2() + if err != nil { + return fmt.Errorf("dial failed: %v", err) + } + defer conn.Close() + if err = conn.peer(s.chain, nil); err != nil { + return fmt.Errorf("peering failed: %v", err) + } + + req := &snap.GetAccessListsPacket{ + ID: uint64(rand.Int63()), + Hashes: tc.hashes, + Bytes: tc.nBytes, + } + msg, err := conn.snapRequest(snap.GetAccessListsMsg, req) + if err != nil { + return fmt.Errorf("access list request failed: %v", err) + } + res, ok := msg.(*snap.AccessListsPacket) + if !ok { + return fmt.Errorf("unexpected response type: %T", msg) + } + if res.ID != req.ID { + return fmt.Errorf("request id mismatch: got %d, want %d", res.ID, req.ID) + } + + // Check list length bounds. + got := res.AccessLists.Len() + if got < tc.minEntries || got > tc.maxEntries { + return fmt.Errorf("response has %d entries, want between %d and %d", got, tc.minEntries, tc.maxEntries) + } + + // Build a map of request-index -> block so we can verify BAL hashes. + blocks := make(map[int]*types.Block) + for i, h := range tc.hashes { + for _, b := range s.chain.blocks { + if b.Hash() == h { + blocks[i] = b + break + } + } + } + + // Iterate the response, validating each entry positionally. + var ( + idx int + it = res.AccessLists.ContentIterator() + ) + for it.Next() { + raw := it.Value() + block := blocks[idx] + + // Empty entry: per spec, indicates BAL is unavailable for that block. + if bytes.Equal(raw, rlp.EmptyString) { + if block != nil && block.Header().BlockAccessListHash != nil { + // Not a failure — the server is allowed to legitimately not + // have the BAL. But we log it so the test output is diagnosable. + t.Logf(" entry %d: server returned empty for known post-Amsterdam block %x", idx, tc.hashes[idx]) + } + idx++ + continue + } + + // Non-empty entry. A BAL is only legitimate for a block we know + // locally whose header commits to one; for any other hash the only + // valid response is the RLP empty string, so receiving data here + // means the server fabricated it. + if block == nil { + return fmt.Errorf("entry %d: server returned BAL data for unknown hash %x", idx, tc.hashes[idx]) + } + if block.Header().BlockAccessListHash == nil { + return fmt.Errorf("entry %d: server returned BAL data for a block with no expected BAL (hash %x)", idx, tc.hashes[idx]) + } + + // Per EIP-8189: compute keccak256(rlp.encode(bal)) against the raw + // bytes actually received on the wire, and compare to the header + // commitment. Hashing raw bytes (rather than re-encoding after a + // decode round-trip) catches peers that send non-canonical BAL + // encodings. + have := crypto.Keccak256Hash(raw) + want := *block.Header().BlockAccessListHash + if have != want { + return fmt.Errorf("entry %d: BAL hash mismatch: have %x, want %x", idx, have, want) + } + + // Decode and validate the BAL's internal structure: ordering of + // accounts/slots/changes, code-size limits, and per-entry access-index + // bounds, against the known block. + var accessList bal.BlockAccessList + if err := rlp.DecodeBytes(raw, &accessList); err != nil { + return fmt.Errorf("entry %d: invalid BAL RLP: %v", idx, err) + } + if err := accessList.Validate(block.GasLimit(), len(block.Transactions())); err != nil { + return fmt.Errorf("entry %d: BAL failed validation: %v", idx, err) + } + idx++ + } + + // Sanity: iterator consumed exactly the reported number of entries. + if idx != got { + return fmt.Errorf("iterator visited %d entries, expected %d", idx, got) + } + return nil +} diff --git a/cmd/devp2p/internal/ethtest/suite.go b/cmd/devp2p/internal/ethtest/suite.go index 20a03529ea..eff573ecb1 100644 --- a/cmd/devp2p/internal/ethtest/suite.go +++ b/cmd/devp2p/internal/ethtest/suite.go @@ -106,6 +106,16 @@ func (s *Suite) SnapTests() []utesting.Test { } } +// Snap2Tests returns the list of tests for the snap/2 protocol (EIP-8189). +// These tests require the peer to advertise and negotiate snap/2. +func (s *Suite) Snap2Tests() []utesting.Test { + return []utesting.Test{ + {Name: "Status", Fn: s.TestSnap2Status}, + {Name: "GetBlockAccessLists", Fn: s.TestSnap2GetBlockAccessLists}, + {Name: "TrieNodesRemoved", Fn: s.TestSnap2TrieNodesRemoved}, + } +} + func (s *Suite) TestStatus(t *utesting.T) { t.Log(`This test is just a sanity check. It performs an eth protocol handshake.`) conn, err := s.dialAndPeer(nil) diff --git a/cmd/devp2p/internal/ethtest/suite_test.go b/cmd/devp2p/internal/ethtest/suite_test.go index a6fca0e524..3c67ad3ef1 100644 --- a/cmd/devp2p/internal/ethtest/suite_test.go +++ b/cmd/devp2p/internal/ethtest/suite_test.go @@ -99,6 +99,31 @@ func TestSnapSuite(t *testing.T) { } } +func TestSnap2Suite(t *testing.T) { + jwtPath, secret, err := makeJWTSecret(t) + if err != nil { + t.Fatalf("could not make jwt secret: %v", err) + } + geth, err := runGeth("./testdata", jwtPath) + if err != nil { + t.Fatalf("could not run geth: %v", err) + } + defer geth.Close() + + suite, err := NewSuite(geth.Server().Self(), "./testdata", geth.HTTPAuthEndpoint(), common.Bytes2Hex(secret[:])) + if err != nil { + t.Fatalf("could not create new test suite: %v", err) + } + for _, test := range suite.Snap2Tests() { + t.Run(test.Name, func(t *testing.T) { + result := utesting.RunTests([]utesting.Test{{Name: test.Name, Fn: test.Fn}}, os.Stdout) + if result[0].Failed { + t.Fatal() + } + }) + } +} + // runGeth creates and starts a geth node func runGeth(dir string, jwtPath string) (*node.Node, error) { stack, err := node.New(&node.Config{ @@ -141,6 +166,7 @@ func setupGeth(stack *node.Node, dir string) error { TrieDirtyCache: 16, TrieTimeout: 60 * time.Minute, SnapshotCache: 10, + SnapV2: true, // advertise snap/2 (alongside snap/1) so the snap/2 suite can negotiate it }) if err != nil { return err diff --git a/cmd/devp2p/rlpxcmd.go b/cmd/devp2p/rlpxcmd.go index ec73171e76..a08fe707ba 100644 --- a/cmd/devp2p/rlpxcmd.go +++ b/cmd/devp2p/rlpxcmd.go @@ -64,6 +64,7 @@ var ( rlpxPingCommand, rlpxEthTestCommand, rlpxSnapTestCommand, + rlpxSnap2TestCommand, }, } rlpxPingCommand = &cli.Command{ @@ -99,6 +100,20 @@ var ( testNodeEngineFlag, }, } + rlpxSnap2TestCommand = &cli.Command{ + Name: "snap2-test", + Usage: "Runs snap/2 (EIP-8189) protocol tests against a node", + ArgsUsage: "", + Action: rlpxSnap2Test, + Flags: []cli.Flag{ + testPatternFlag, + testTAPFlag, + testChainDirFlag, + testNodeFlag, + testNodeJWTFlag, + testNodeEngineFlag, + }, + } ) func rlpxPing(ctx *cli.Context) error { @@ -164,6 +179,16 @@ func rlpxSnapTest(ctx *cli.Context) error { return runTests(ctx, suite.SnapTests()) } +// rlpxSnap2Test runs the snap/2 (EIP-8189) protocol test suite. +func rlpxSnap2Test(ctx *cli.Context) error { + p := cliTestParams(ctx) + suite, err := ethtest.NewSuite(p.node, p.chainDir, p.engineAPI, p.jwt) + if err != nil { + exit(err) + } + return runTests(ctx, suite.Snap2Tests()) +} + type testParams struct { node *enode.Node engineAPI string diff --git a/cmd/geth/main.go b/cmd/geth/main.go index e547256e00..5e90164aaa 100644 --- a/cmd/geth/main.go +++ b/cmd/geth/main.go @@ -204,6 +204,7 @@ var ( utils.MetricsInfluxDBBucketFlag, utils.MetricsInfluxDBOrganizationFlag, utils.StateSizeTrackingFlag, + utils.SnapV2Flag, } ) diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index d7f09d9cfd..6bb54dc780 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -297,6 +297,12 @@ var ( Value: ethconfig.Defaults.EnableStateSizeTracking, Category: flags.StateCategory, } + SnapV2Flag = &cli.BoolFlag{ + Name: "snap.v2", + Usage: "Enable the experimental snap/2 (EIP-8189, BAL-based) sync protocol (advertises and syncs via snap/2; not safe on public networks)", + Value: ethconfig.Defaults.SnapV2, + Category: flags.StateCategory, + } BinTrieGroupDepthFlag = &cli.IntFlag{ Name: "bintrie.groupdepth", Usage: "Number of levels per serialized group in binary trie (1-8, default 5). Lower values create smaller groups with more nodes.", @@ -1905,6 +1911,9 @@ func SetEthConfig(ctx *cli.Context, stack *node.Node, cfg *ethconfig.Config) { if ctx.IsSet(StateSizeTrackingFlag.Name) { cfg.EnableStateSizeTracking = ctx.Bool(StateSizeTrackingFlag.Name) } + if ctx.IsSet(SnapV2Flag.Name) { + cfg.SnapV2 = ctx.Bool(SnapV2Flag.Name) + } // Override any default configs for hard coded networks. switch { case ctx.Bool(MainnetFlag.Name): diff --git a/core/blockchain.go b/core/blockchain.go index 166b58b05b..c914a6dd81 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -1164,7 +1164,7 @@ func (bc *BlockChain) SnapSyncStart() error { // given hash, regardless of the chain contents prior to snap sync. It is // invoked once snap sync completes and assumes that SnapSyncStart was called // previously. -func (bc *BlockChain) SnapSyncComplete(hash common.Hash) error { +func (bc *BlockChain) SnapSyncComplete(hash common.Hash, isSnapV2 bool) error { // Make sure that both the block as well at its state trie exists block := bc.GetBlockByHash(hash) if block == nil { @@ -1175,19 +1175,28 @@ func (bc *BlockChain) SnapSyncComplete(hash common.Hash) error { } defer bc.chainmu.Unlock() - // Reset the trie database with the fresh snap synced state. + // Reset the trie database with the fresh snap synced state. Snap/1 needs + // a full trie-to-flat regeneration; snap/2 adopts the already-consistent + // flat state and skips that work. root := block.Root() if bc.triedb.Scheme() == rawdb.PathScheme { - if err := bc.triedb.Enable(root); err != nil { - return err + if isSnapV2 { + if err := bc.triedb.AdoptSyncedState(root); err != nil { + return err + } + } else { + if err := bc.triedb.Enable(root); err != nil { + return err + } } } if !bc.HasState(root) { return fmt.Errorf("non existent state [%x..]", root[:4]) } - // Destroy any existing state snapshot and regenerate it in the background, - // also resuming the normal maintenance of any previously paused snapshot. - if bc.snaps != nil { + + // The legacy snapshot tree needs to be wiped and rebuilt from the trie + // after a snap/1 sync. + if !isSnapV2 && bc.snaps != nil { bc.snaps.Rebuild(root) } diff --git a/core/rawdb/accessors_snapshot.go b/core/rawdb/accessors_snapshot.go index 24259dbc70..4573e08321 100644 --- a/core/rawdb/accessors_snapshot.go +++ b/core/rawdb/accessors_snapshot.go @@ -250,3 +250,10 @@ func DeleteGenerateTriePartitionDone(db ethdb.KeyValueWriter, partition byte) { log.Crit("Failed to remove generate-trie done marker", "err", err) } } + +// DeleteSnapshotSyncStatus removes the serialized sync status from the database. +func DeleteSnapshotSyncStatus(db ethdb.KeyValueWriter) { + if err := db.Delete(snapshotSyncStatusKey); err != nil { + log.Crit("Failed to remove snapshot sync status", "err", err) + } +} diff --git a/eth/backend.go b/eth/backend.go index 2f10351b9c..5853697d64 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -352,6 +352,7 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { Sync: config.SyncMode, BloomCache: uint64(cacheLimit), RequiredBlocks: config.RequiredBlocks, + SnapV2: config.SnapV2, }); err != nil { return nil, err } @@ -448,7 +449,7 @@ func (s *Ethereum) ArchiveMode() bool { return s.config.NoPruni func (s *Ethereum) Protocols() []p2p.Protocol { protos := eth.MakeProtocols((*ethHandler)(s.handler), s.networkID, s.discmix) if s.config.SnapshotCache > 0 { - protos = append(protos, snap.MakeProtocols((*snapHandler)(s.handler))...) + protos = append(protos, snap.MakeProtocols((*snapHandler)(s.handler), s.config.SnapV2)...) } return protos } diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index 4a575d6856..e0a4ec6b6d 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -142,7 +142,7 @@ type Downloader struct { pivotHeader *types.Header // Pivot block header to dynamically push the syncing state root pivotLock sync.RWMutex // Lock protecting pivot header reads from updates - SnapSyncer *snap.Syncer // TODO(karalabe): make private! hack for now + snapSyncer snap.Syncer // snap/1 or snap/2 state syncer, selected at construction stateSyncStart chan *stateSync // Cancellation and termination @@ -201,7 +201,7 @@ type BlockChain interface { SnapSyncStart() error // SnapSyncComplete directly commits the head block to a certain entity. - SnapSyncComplete(common.Hash) error + SnapSyncComplete(hash common.Hash, isSnapV2 bool) error // InsertHeadersBeforeCutoff inserts a batch of headers before the configured // chain cutoff into the ancient store. @@ -232,7 +232,7 @@ type BlockChain interface { } // New creates a new downloader to fetch hashes and blocks from remote peers. -func New(stateDb ethdb.Database, mode ethconfig.SyncMode, chain BlockChain, dropPeer peerDropFn, success func()) *Downloader { +func New(stateDb ethdb.Database, mode ethconfig.SyncMode, chain BlockChain, dropPeer peerDropFn, success func(), snapV2 bool) *Downloader { cutoffNumber, cutoffHash := chain.HistoryPruningCutoff() dl := &Downloader{ stateDB: stateDb, @@ -245,10 +245,15 @@ func New(stateDb ethdb.Database, mode ethconfig.SyncMode, chain BlockChain, drop dropPeer: dropPeer, headerProcCh: make(chan *headerTask, 1), quitCh: make(chan struct{}), - SnapSyncer: snap.NewSyncer(stateDb, chain.TrieDB().Scheme()), stateSyncStart: make(chan *stateSync), syncStartBlock: chain.CurrentSnapBlock().Number.Uint64(), } + // Select the snap/1 or snap/2 state syncer based on the feature flag. + if snapV2 { + dl.snapSyncer = snap.NewV2Syncer(stateDb, chain.TrieDB().Scheme()) + } else { + dl.snapSyncer = snap.NewV1Syncer(stateDb, chain.TrieDB().Scheme()) + } // Create the post-merge skeleton syncer and start the process dl.skeleton = newSkeleton(stateDb, dl.peers, dropPeer, newBeaconBackfiller(dl, success), chain) @@ -278,7 +283,7 @@ func (d *Downloader) Progress() ethereum.SyncProgress { default: log.Error("Unknown downloader mode", "mode", mode) } - progress, pending := d.SnapSyncer.Progress() + progress := d.snapSyncer.Progress() return ethereum.SyncProgress{ StartingBlock: d.syncStatsChainOrigin, @@ -294,8 +299,8 @@ func (d *Downloader) Progress() ethereum.SyncProgress { HealedTrienodeBytes: uint64(progress.TrienodeHealBytes), HealedBytecodes: progress.BytecodeHealSynced, HealedBytecodeBytes: uint64(progress.BytecodeHealBytes), - HealingTrienodes: pending.TrienodeHeal, - HealingBytecode: pending.BytecodeHeal, + HealingTrienodes: progress.HealingTrienodes, + HealingBytecode: progress.HealingBytecode, } } @@ -889,7 +894,7 @@ func (d *Downloader) processSnapSyncContent() error { // Start syncing state of the reported head block. This should get us most of // the state of the pivot block. d.pivotLock.RLock() - sync := d.syncState(d.pivotHeader.Root) + sync := d.syncState(d.pivotHeader) d.pivotLock.RUnlock() defer func() { @@ -959,10 +964,9 @@ func (d *Downloader) processSnapSyncContent() error { if oldPivot == nil { // no results piling up, we can move the pivot if !d.committed.Load() { // not yet passed the pivot, we can move the pivot - if pivot.Root != sync.root { // pivot position changed, we can move the pivot + if pivot.Root != sync.pivot.Root { // pivot state root changed, we can move the pivot sync.Cancel() - sync = d.syncState(pivot.Root) - + sync = d.syncState(pivot) go closeOnErr(sync) } } @@ -977,8 +981,7 @@ func (d *Downloader) processSnapSyncContent() error { // If new pivot block found, cancel old state retrieval and restart if oldPivot != P { sync.Cancel() - sync = d.syncState(P.Header.Root) - + sync = d.syncState(P.Header) go closeOnErr(sync) oldPivot = P } @@ -1070,7 +1073,7 @@ func (d *Downloader) commitPivotBlock(result *fetchResult) error { if _, err := d.blockchain.InsertReceiptChain([]*types.Block{block}, []rlp.RawValue{result.Receipts}, d.ancientLimit); err != nil { return err } - if err := d.blockchain.SnapSyncComplete(block.Hash()); err != nil { + if err := d.blockchain.SnapSyncComplete(block.Hash(), d.snapSyncer.Version() == snap.SNAP2); err != nil { return err } d.committed.Store(true) @@ -1086,23 +1089,46 @@ func (d *Downloader) DeliverSnapPacket(peer *snap.Peer, packet snap.Packet) erro if err != nil { return err } - return d.SnapSyncer.OnAccounts(peer, packet.ID, hashes, accounts, packet.Proof) + return d.snapSyncer.OnAccounts(peer, packet.ID, hashes, accounts, packet.Proof) case *snap.StorageRangesPacket: hashset, slotset := packet.Unpack() - return d.SnapSyncer.OnStorage(peer, packet.ID, hashset, slotset, packet.Proof) + return d.snapSyncer.OnStorage(peer, packet.ID, hashset, slotset, packet.Proof) case *snap.ByteCodesPacket: - return d.SnapSyncer.OnByteCodes(peer, packet.ID, packet.Codes) + return d.snapSyncer.OnByteCodes(peer, packet.ID, packet.Codes) case *snap.TrieNodesPacket: - return d.SnapSyncer.OnTrieNodes(peer, packet.ID, packet.Nodes) + return d.snapSyncer.OnTrieNodes(peer, packet.ID, packet.Nodes) + + case *snap.AccessListsPacket: + return d.snapSyncer.OnAccessLists(peer, packet.ID, packet.AccessLists) default: return fmt.Errorf("unexpected snap packet type: %T", packet) } } +// RegisterSnapPeer registers a snap peer with the active state syncer. Peers that +// negotiated a snap version below the syncer's minimum are skipped — e.g. the +// snap/2 syncer skips snap/1-only peers, which cannot answer its BAL requests. +func (d *Downloader) RegisterSnapPeer(p *snap.Peer) error { + if p.Version() < d.snapSyncer.Version() { + return nil + } + return d.snapSyncer.Register(p) +} + +// UnregisterSnapPeer removes a snap peer from the active state syncer. It mirrors +// RegisterSnapPeer's version gate: a peer below the active syncer's version was +// never registered, so there is nothing to remove. +func (d *Downloader) UnregisterSnapPeer(p *snap.Peer) error { + if p.Version() < d.snapSyncer.Version() { + return nil + } + return d.snapSyncer.Unregister(p.ID()) +} + // readHeaderRange returns a list of headers, using the given last header as the base, // and going backwards towards genesis. This method assumes that the caller already has // placed a reasonable cap on count. diff --git a/eth/downloader/downloader_test.go b/eth/downloader/downloader_test.go index e6c477cd33..de3a3f1991 100644 --- a/eth/downloader/downloader_test.go +++ b/eth/downloader/downloader_test.go @@ -52,8 +52,14 @@ func newTester(t *testing.T, mode ethconfig.SyncMode) *downloadTester { return newTesterWithNotification(t, mode, nil) } -// newTesterWithNotification creates a new downloader test mocker. +// newTesterWithNotification creates a new downloader test mocker (snap/1). func newTesterWithNotification(t *testing.T, mode ethconfig.SyncMode, success func()) *downloadTester { + return newTesterWithSnap(t, mode, success, false) +} + +// newTesterWithSnap is like newTesterWithNotification but selects the snap/2 +// state syncer when snapV2 is set. +func newTesterWithSnap(t *testing.T, mode ethconfig.SyncMode, success func(), snapV2 bool) *downloadTester { db, err := rawdb.Open(rawdb.NewMemoryDatabase(), rawdb.OpenOptions{}) if err != nil { panic(err) @@ -74,7 +80,7 @@ func newTesterWithNotification(t *testing.T, mode ethconfig.SyncMode, success fu chain: chain, peers: make(map[string]*downloadTesterPeer), } - tester.downloader = New(db, mode, tester.chain, tester.dropPeer, success) + tester.downloader = New(db, mode, tester.chain, tester.dropPeer, success, snapV2) return tester } @@ -102,7 +108,7 @@ func (dl *downloadTester) newPeer(id string, version uint, blocks []*types.Block if err := dl.downloader.RegisterPeer(id, version, peer); err != nil { panic(err) } - if err := dl.downloader.SnapSyncer.Register(peer); err != nil { + if err := dl.downloader.snapSyncer.Register(peer); err != nil { panic(err) } return peer @@ -114,7 +120,7 @@ func (dl *downloadTester) dropPeer(id string) { defer dl.lock.Unlock() delete(dl.peers, id) - dl.downloader.SnapSyncer.Unregister(id) + dl.downloader.snapSyncer.Unregister(id) dl.downloader.UnregisterPeer(id) } @@ -329,7 +335,7 @@ func (dlp *downloadTesterPeer) RequestAccountRange(id uint64, root, origin, limi } hashes, accounts, _ := res.Unpack() - go dlp.dl.downloader.SnapSyncer.OnAccounts(dlp, id, hashes, accounts, proofs) + go dlp.dl.downloader.snapSyncer.OnAccounts(dlp, id, hashes, accounts, proofs) return nil } @@ -356,7 +362,7 @@ func (dlp *downloadTesterPeer) RequestStorageRanges(id uint64, root common.Hash, } hashes, slots := res.Unpack() - go dlp.dl.downloader.SnapSyncer.OnStorage(dlp, id, hashes, slots, proofs) + go dlp.dl.downloader.snapSyncer.OnStorage(dlp, id, hashes, slots, proofs) return nil } @@ -368,11 +374,12 @@ func (dlp *downloadTesterPeer) RequestByteCodes(id uint64, hashes []common.Hash, Bytes: uint64(bytes), } codes := snap.ServiceGetByteCodesQuery(dlp.chain, req) - go dlp.dl.downloader.SnapSyncer.OnByteCodes(dlp, id, codes) + go dlp.dl.downloader.snapSyncer.OnByteCodes(dlp, id, codes) return nil } -// RequestTrieNodes fetches a batch of account or storage trie nodes. +// RequestTrieNodes fetches a batch of trie nodes (snap/1 healing). snap/2 never +// issues these, but the method is required to satisfy snap.SyncPeerV2. func (dlp *downloadTesterPeer) RequestTrieNodes(id uint64, root common.Hash, count int, paths []snap.TrieNodePathSet, bytes int) error { encPaths, err := rlp.EncodeToRawList(paths) if err != nil { @@ -385,7 +392,19 @@ func (dlp *downloadTesterPeer) RequestTrieNodes(id uint64, root common.Hash, cou Bytes: uint64(bytes), } nodes, _ := snap.ServiceGetTrieNodesQuery(dlp.chain, req) - go dlp.dl.downloader.SnapSyncer.OnTrieNodes(dlp, id, nodes) + go dlp.dl.downloader.snapSyncer.OnTrieNodes(dlp, id, nodes) + return nil +} + +// RequestAccessLists fetches a batch of BALs by block hash. +func (dlp *downloadTesterPeer) RequestAccessLists(id uint64, hashes []common.Hash, bytes int) error { + req := &snap.GetAccessListsPacket{ + ID: id, + Hashes: hashes, + Bytes: uint64(bytes), + } + als := snap.ServiceGetAccessListsQuery(dlp.chain, req) + go dlp.dl.downloader.snapSyncer.OnAccessLists(dlp, id, als) return nil } @@ -412,14 +431,15 @@ func assertOwnChain(t *testing.T, tester *downloadTester, length int) { } } -func TestCanonicalSynchronisationFull(t *testing.T) { testCanonSync(t, eth.ETH69, FullSync) } -func TestCanonicalSynchronisationSnap(t *testing.T) { testCanonSync(t, eth.ETH69, SnapSync) } +func TestCanonicalSynchronisationFull(t *testing.T) { testCanonSync(t, eth.ETH69, FullSync, false) } +func TestCanonicalSynchronisationSnap(t *testing.T) { testCanonSync(t, eth.ETH69, SnapSync, false) } +func TestCanonicalSynchronisationSnapV2(t *testing.T) { testCanonSync(t, eth.ETH69, SnapSync, true) } -func testCanonSync(t *testing.T, protocol uint, mode SyncMode) { +func testCanonSync(t *testing.T, protocol uint, mode SyncMode, snapV2 bool) { success := make(chan struct{}) - tester := newTesterWithNotification(t, mode, func() { + tester := newTesterWithSnap(t, mode, func() { close(success) - }) + }, snapV2) defer tester.terminate() // Create a small enough block chain to download diff --git a/eth/downloader/statesync.go b/eth/downloader/statesync.go index 501af63ed5..0220a3a9c4 100644 --- a/eth/downloader/statesync.go +++ b/eth/downloader/statesync.go @@ -19,14 +19,14 @@ package downloader import ( "sync" - "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" ) -// syncState starts downloading state with the given root hash. -func (d *Downloader) syncState(root common.Hash) *stateSync { +// syncState starts downloading state with the given pivot header. +func (d *Downloader) syncState(pivot *types.Header) *stateSync { // Create the state sync - s := newStateSync(d, root) + s := newStateSync(d, pivot) select { case d.stateSyncStart <- s: // If we tell the statesync to restart with a new root, we also need @@ -58,7 +58,7 @@ func (d *Downloader) stateFetcher() { // runStateSync runs a state synchronisation until it completes or another root // hash is requested to be switched over to. func (d *Downloader) runStateSync(s *stateSync) *stateSync { - log.Trace("State sync starting", "root", s.root) + log.Trace("State sync starting", "pivot", s.pivot.Hash(), "number", s.pivot.Number) go s.run() defer s.Cancel() @@ -75,10 +75,10 @@ func (d *Downloader) runStateSync(s *stateSync) *stateSync { } // stateSync schedules requests for downloading a particular state trie defined -// by a given state root. +// by a given pivot header. type stateSync struct { - d *Downloader // Downloader instance to access and manage current peerset - root common.Hash // State root currently being synced + d *Downloader // Downloader instance to access and manage current peerset + pivot *types.Header // Pivot header currently being synced started chan struct{} // Started is signalled once the sync loop starts cancel chan struct{} // Channel to signal a termination request @@ -89,10 +89,10 @@ type stateSync struct { // newStateSync creates a new state trie download scheduler. This method does not // yet start the sync. The user needs to call run to initiate. -func newStateSync(d *Downloader, root common.Hash) *stateSync { +func newStateSync(d *Downloader, pivot *types.Header) *stateSync { return &stateSync{ d: d, - root: root, + pivot: pivot, cancel: make(chan struct{}), done: make(chan struct{}), started: make(chan struct{}), @@ -104,7 +104,7 @@ func newStateSync(d *Downloader, root common.Hash) *stateSync { // finish. func (s *stateSync) run() { close(s.started) - s.err = s.d.SnapSyncer.Sync(s.root, s.cancel) + s.err = s.d.snapSyncer.Sync(s.pivot, s.cancel) close(s.done) } diff --git a/eth/ethconfig/config.go b/eth/ethconfig/config.go index b51b78e199..99a0fa89f7 100644 --- a/eth/ethconfig/config.go +++ b/eth/ethconfig/config.go @@ -184,6 +184,11 @@ type Config struct { // Enables tracking of state size EnableStateSizeTracking bool + // SnapV2 enables the experimental snap/2 (EIP-8189, BAL-based) sync protocol: + // the node advertises snap/2 on the wire and uses the snap/2 state syncer. + // It is not safe to enable on public networks yet. + SnapV2 bool + // Enables VM tracing VMTrace string VMTraceJsonConfig string diff --git a/eth/ethconfig/gen_config.go b/eth/ethconfig/gen_config.go index c5e45348be..61d6ccb168 100644 --- a/eth/ethconfig/gen_config.go +++ b/eth/ethconfig/gen_config.go @@ -57,6 +57,7 @@ func (c Config) MarshalTOML() (interface{}, error) { EnableWitnessStats bool StatelessSelfValidation bool EnableStateSizeTracking bool + SnapV2 bool VMTrace string VMTraceJsonConfig string RPCGasCap uint64 @@ -111,6 +112,7 @@ func (c Config) MarshalTOML() (interface{}, error) { enc.EnableWitnessStats = c.EnableWitnessStats enc.StatelessSelfValidation = c.StatelessSelfValidation enc.EnableStateSizeTracking = c.EnableStateSizeTracking + enc.SnapV2 = c.SnapV2 enc.VMTrace = c.VMTrace enc.VMTraceJsonConfig = c.VMTraceJsonConfig enc.RPCGasCap = c.RPCGasCap @@ -169,6 +171,7 @@ func (c *Config) UnmarshalTOML(unmarshal func(interface{}) error) error { EnableWitnessStats *bool StatelessSelfValidation *bool EnableStateSizeTracking *bool + SnapV2 *bool VMTrace *string VMTraceJsonConfig *string RPCGasCap *uint64 @@ -306,6 +309,9 @@ func (c *Config) UnmarshalTOML(unmarshal func(interface{}) error) error { if dec.EnableStateSizeTracking != nil { c.EnableStateSizeTracking = *dec.EnableStateSizeTracking } + if dec.SnapV2 != nil { + c.SnapV2 = *dec.SnapV2 + } if dec.VMTrace != nil { c.VMTrace = *dec.VMTrace } diff --git a/eth/handler.go b/eth/handler.go index 76df635fb0..3c5e122c80 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -108,6 +108,7 @@ type handlerConfig struct { Sync ethconfig.SyncMode // Whether to snap or full sync BloomCache uint64 // Megabytes to alloc for snap sync bloom RequiredBlocks map[uint64]common.Hash // Hard coded map of required block hashes for sync challenges + SnapV2 bool // Whether to advertise and sync via the snap/2 protocol } type handler struct { @@ -156,7 +157,7 @@ func newHandler(config *handlerConfig) (*handler, error) { handlerStartCh: make(chan struct{}), } // Construct the downloader (long sync) - h.downloader = downloader.New(config.Database, config.Sync, h.chain, h.removePeer, h.enableSyncedFeatures) + h.downloader = downloader.New(config.Database, config.Sync, h.chain, h.removePeer, h.enableSyncedFeatures, config.SnapV2) // If snap sync is requested but snapshots are disabled, fail loudly if h.downloader.ConfigSyncMode() == ethconfig.SnapSync && (config.Chain.Snapshots() == nil && config.Chain.TrieDB().Scheme() == rawdb.HashScheme) { @@ -278,7 +279,7 @@ func (h *handler) runEthPeer(peer *eth.Peer, handler eth.Handler) error { return err } if snap != nil { - if err := h.downloader.SnapSyncer.Register(snap); err != nil { + if err := h.downloader.RegisterSnapPeer(snap); err != nil { peer.Log().Error("Failed to register peer in snap syncer", "err", err) return err } @@ -392,7 +393,7 @@ func (h *handler) unregisterPeer(id string) { // Remove the `snap` extension if it exists if peer.snapExt != nil { - h.downloader.SnapSyncer.Unregister(id) + h.downloader.UnregisterSnapPeer(peer.snapExt.Peer) } h.downloader.UnregisterPeer(id) h.txFetcher.Drop(id) diff --git a/eth/protocols/snap/bal_apply.go b/eth/protocols/snap/bal_apply.go new file mode 100644 index 0000000000..3e565662e7 --- /dev/null +++ b/eth/protocols/snap/bal_apply.go @@ -0,0 +1,188 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snap + +import ( + "bytes" + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/core/types/bal" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/rlp" + "github.com/holiman/uint256" +) + +// verifyAccessList checks that the given block access list matches the hash +// committed in the block header. +func verifyAccessList(b *bal.BlockAccessList, header *types.Header) error { + if header.BlockAccessListHash == nil { + return fmt.Errorf("header %d has no access list hash", header.Number) + } + have := b.Hash() + if have != *header.BlockAccessListHash { + return fmt.Errorf("access list hash mismatch for block %d: have %v, want %v", header.Number, have, *header.BlockAccessListHash) + } + return nil +} + +// isFetched tells us if accountHash has been downloaded. +func (s *syncerV2) isFetched(accountHash common.Hash) bool { + s.lock.RLock() + defer s.lock.RUnlock() + + for _, task := range s.tasks { + if bytes.Compare(accountHash[:], task.Last[:]) <= 0 { + return bytes.Compare(accountHash[:], task.Next[:]) < 0 + } + } + return true +} + +// isStorageFetched reports whether the specified storage slot has already +// been downloaded in a previous cycle. +func (s *syncerV2) isStorageFetched(accountHash, storageHash common.Hash) bool { + s.lock.RLock() + defer s.lock.RUnlock() + + for _, task := range s.tasks { + if bytes.Compare(accountHash[:], task.Last[:]) > 0 { + continue + } + // The account falls within a completed account range. + if bytes.Compare(accountHash[:], task.Next[:]) < 0 { + return true + } + // All storage for this account has been synchronized. + if _, ok := task.stateCompleted[accountHash]; ok { + return true + } + // No storage sync task exists for this account yet. + subtasks, ok := task.SubTasks[accountHash] + if !ok { + return false + } + // Check whether the slot falls within a completed storage subrange. + for _, sub := range subtasks { + if bytes.Compare(storageHash[:], sub.Last[:]) <= 0 { + return bytes.Compare(storageHash[:], sub.Next[:]) < 0 + } + } + return true // All storage subranges for this slot have been completed. + } + return true // The account belongs to a completed account range. +} + +// applyAccessList applies a single block's access list diffs to the flat state +// in the database. For each account, it applies the post-block values (highest +// TxIdx entry) for balance, nonce, code, and storage. The storageRoot field is +// intentionally left stale. It will be recomputed during the trie rebuild. +func (s *syncerV2) applyAccessList(b *bal.BlockAccessList, batch ethdb.Batch) error { + // Iterate over all accounts in the access list + for _, access := range *b { + addr := access.Address + accountHash := crypto.Keccak256Hash(addr[:]) + + for _, slotWrites := range access.StorageChanges { + if n := len(slotWrites.SlotChanges); n > 0 { + value := slotWrites.SlotChanges[n-1].PostValue + slotKey := slotWrites.Slot.Bytes32() + storageHash := crypto.Keccak256Hash(slotKey[:]) + + if !s.isStorageFetched(accountHash, storageHash) { + continue + } + if value.IsZero() { + rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash) + } else { + // Store the slot in the same encoding the snapshot and the + // trie rebuild use: RLP of the minimal big-endian value + // (leading zeros trimmed), matching core/state's snapshot + // writes. + blob, _ := rlp.EncodeToBytes(value.Bytes()) + rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, blob) + } + } + } + if !s.isFetched(accountHash) { + continue + } + // Read the existing account from flat state (may not exist yet) + var ( + account types.StateAccount + isNew bool + ) + if data := rawdb.ReadAccountSnapshot(s.db, accountHash); len(data) > 0 { + existing, err := types.FullAccount(data) + if err != nil { + return fmt.Errorf("failed to decode account %v: %w", addr, err) + } + account = *existing + } else { + // New account — initialize with defaults + isNew = true + account.Balance = new(uint256.Int) + account.Root = types.EmptyRootHash + account.CodeHash = types.EmptyCodeHash[:] + } + + // Apply balance change (last entry = post-block state) + if n := len(access.BalanceChanges); n > 0 { + account.Balance = new(uint256.Int).Set(access.BalanceChanges[n-1].PostBalance) + } + + // Apply nonce change (last entry = post-block state) + if n := len(access.NonceChanges); n > 0 { + account.Nonce = access.NonceChanges[n-1].PostNonce + } + + // Apply code change (last entry = post-block state) + if n := len(access.CodeChanges); n > 0 { + code := access.CodeChanges[n-1].NewCode + if len(code) > 0 { + codeHash := crypto.Keccak256(code) + rawdb.WriteCode(batch, common.BytesToHash(codeHash), code) + account.CodeHash = codeHash + } else { + account.CodeHash = types.EmptyCodeHash[:] + } + } + + // Don't create empty accounts in flat state (EIP-161). + isEmpty := account.Balance.IsZero() && account.Nonce == 0 && + bytes.Equal(account.CodeHash, types.EmptyCodeHash[:]) + switch { + case isEmpty && isNew: + // This covers cases where an account is created and destroyed within the + // same transaction, or where its net state change across the block is zero. + // The empty -> empty transition should be excluded from account update. + case isEmpty && !isNew: + // Existing account got fully drained (e.g., pre-funded + // address that gets deployed to with init code that + // self-destructs). Delete the entry so the trie rebuild + // doesn't pick it up as an empty leaf. + rawdb.DeleteAccountSnapshot(batch, accountHash) + default: + // Write the updated account (storageRoot intentionally left stale) + rawdb.WriteAccountSnapshot(batch, accountHash, types.SlimAccountRLP(account)) + } + } + return nil +} diff --git a/eth/protocols/snap/bal_apply_test.go b/eth/protocols/snap/bal_apply_test.go new file mode 100644 index 0000000000..a9e7f789a5 --- /dev/null +++ b/eth/protocols/snap/bal_apply_test.go @@ -0,0 +1,591 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snap + +import ( + "bytes" + "math/big" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/core/types/bal" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/rlp" + "github.com/holiman/uint256" +) + +// buildTestBAL constructs a BlockAccessList from a ConstructionBlockAccessList +// by RLP round-tripping (construction types use unexported encoding types). +func buildTestBAL(t *testing.T, cb *bal.ConstructionBlockAccessList) *bal.BlockAccessList { + t.Helper() + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatalf("failed to encode BAL: %v", err) + } + var b bal.BlockAccessList + if err := rlp.DecodeBytes(buf.Bytes(), &b); err != nil { + t.Fatalf("failed to decode BAL: %v", err) + } + return &b +} + +// applyBAL applies b to the syncer's flat state and commits it, mirroring the +// per-block batch flow used during catch-up: applyAccessList writes into a batch +// that the caller commits. +func applyBAL(t *testing.T, s *syncerV2, b *bal.BlockAccessList) { + t.Helper() + batch := s.db.NewBatch() + if err := s.applyAccessList(b, batch); err != nil { + t.Fatalf("applyAccessList failed: %v", err) + } + if err := batch.Write(); err != nil { + t.Fatalf("failed to commit BAL batch: %v", err) + } +} + +// TestAccessListVerification checks that verifyAccessList accepts valid BALs +// and rejects tampered ones. +func TestAccessListVerification(t *testing.T) { + t.Parallel() + + cb := bal.NewConstructionBlockAccessList() + addr := common.HexToAddress("0x01") + cb.BalanceChange(0, addr, uint256.NewInt(100)) + + b := buildTestBAL(t, cb) + correctHash := b.Hash() + + // Valid: hash matches header + header := &types.Header{ + Number: big.NewInt(1), + BlockAccessListHash: &correctHash, + } + if err := verifyAccessList(b, header); err != nil { + t.Fatalf("valid access list rejected: %v", err) + } + // Invalid: wrong hash in header + wrongHash := common.HexToHash("0xdead") + badHeader := &types.Header{ + Number: big.NewInt(1), + BlockAccessListHash: &wrongHash, + } + if err := verifyAccessList(b, badHeader); err == nil { + t.Fatal("tampered access list accepted") + } + // Invalid: no hash in header + noHashHeader := &types.Header{ + Number: big.NewInt(1), + } + if err := verifyAccessList(b, noHashHeader); err == nil { + t.Fatal("header without access list hash accepted") + } +} + +// TestAccessListApplication verifies that applyAccessList correctly updates +// flat state (balance, nonce, code, storage) and leaves storageRoot stale. +func TestAccessListApplication(t *testing.T) { + t.Parallel() + db := rawdb.NewMemoryDatabase() + syncer := newSyncerV2(db, rawdb.HashScheme) + addr := common.HexToAddress("0x01") + accountHash := crypto.Keccak256Hash(addr[:]) + + // Write an existing account to flat state + original := types.StateAccount{ + Nonce: 5, + Balance: uint256.NewInt(1000), + Root: common.HexToHash("0xbeef"), // intentionally non-empty + CodeHash: types.EmptyCodeHash[:], + } + rawdb.WriteAccountSnapshot(db, accountHash, types.SlimAccountRLP(original)) + + // Write an existing storage slot. The BAL uses raw slot keys, but the + // snapshot layer stores slots under keccak256(slot). + rawSlot := common.HexToHash("0xaa") + slotHash := crypto.Keccak256Hash(rawSlot[:]) + rawdb.WriteStorageSnapshot(db, accountHash, slotHash, common.HexToHash("0x01").Bytes()) + + // Build a BAL that changes balance, nonce, code, and storage + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, addr, uint256.NewInt(2000)) + cb.NonceChange(addr, 0, 6) + cb.CodeChange(addr, 0, []byte{0x60, 0x00}) // PUSH1 0x00 + cb.StorageWrite(0, addr, rawSlot, common.HexToHash("0x02")) + b := buildTestBAL(t, cb) + applyBAL(t, syncer, b) + + // Verify account fields updated + data := rawdb.ReadAccountSnapshot(db, accountHash) + if len(data) == 0 { + t.Fatal("account snapshot missing after apply") + } + updated, err := types.FullAccount(data) + if err != nil { + t.Fatalf("failed to decode updated account: %v", err) + } + if updated.Balance.Cmp(uint256.NewInt(2000)) != 0 { + t.Errorf("balance wrong: got %v, want 2000", updated.Balance) + } + if updated.Nonce != 6 { + t.Errorf("nonce wrong: got %d, want 6", updated.Nonce) + } + wantCodeHash := crypto.Keccak256([]byte{0x60, 0x00}) + if !bytes.Equal(updated.CodeHash, wantCodeHash) { + t.Errorf("code hash wrong: got %x, want %x", updated.CodeHash, wantCodeHash) + } + + // Verify code was written + if code := rawdb.ReadCode(db, common.BytesToHash(wantCodeHash)); !bytes.Equal(code, []byte{0x60, 0x00}) { + t.Errorf("code wrong: got %x, want 6000", code) + } + + // Verify storage updated. Slots are stored in the canonical snapshot + // encoding (RLP of the value with leading zeros trimmed), the same form + // the download path writes and the trie rebuild consumes. + storageVal := rawdb.ReadStorageSnapshot(db, accountHash, slotHash) + wantStorage, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(common.HexToHash("0x02").Bytes())) + if !bytes.Equal(storageVal, wantStorage) { + t.Errorf("storage wrong: got %x, want %x", storageVal, wantStorage) + } + + // Verify storageRoot left stale (unchanged from original) + if updated.Root != original.Root { + t.Errorf("storageRoot should be stale: got %v, want %v", updated.Root, original.Root) + } +} + +// TestAccessListApplicationMultiTx verifies that when an account has multiple +// changes at different transaction indices, only the highest index (post-block +// state) is applied. +func TestAccessListApplicationMultiTx(t *testing.T) { + t.Parallel() + db := rawdb.NewMemoryDatabase() + syncer := newSyncerV2(db, rawdb.HashScheme) + addr := common.HexToAddress("0x02") + accountHash := crypto.Keccak256Hash(addr[:]) + + // Write initial account + original := types.StateAccount{ + Nonce: 0, + Balance: uint256.NewInt(100), + Root: types.EmptyRootHash, + CodeHash: types.EmptyCodeHash[:], + } + rawdb.WriteAccountSnapshot(db, accountHash, types.SlimAccountRLP(original)) + + // Build BAL with multiple balance/nonce changes at different tx indices + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, addr, uint256.NewInt(200)) // tx 0 + cb.BalanceChange(3, addr, uint256.NewInt(500)) // tx 3 + cb.BalanceChange(7, addr, uint256.NewInt(9999)) // tx 7 (final) + cb.NonceChange(addr, 0, 1) // tx 0 + cb.NonceChange(addr, 3, 2) // tx 3 + cb.NonceChange(addr, 7, 3) // tx 7 (final) + b := buildTestBAL(t, cb) + applyBAL(t, syncer, b) + data := rawdb.ReadAccountSnapshot(db, accountHash) + updated, err := types.FullAccount(data) + if err != nil { + t.Fatalf("failed to decode updated account: %v", err) + } + + // Only the highest tx index values should be applied + if updated.Balance.Cmp(uint256.NewInt(9999)) != 0 { + t.Errorf("balance wrong: got %v, want 9999", updated.Balance) + } + if updated.Nonce != 3 { + t.Errorf("nonce wrong: got %d, want 3", updated.Nonce) + } +} + +// TestAccessListApplicationZeroStorage verifies that a BAL slot write with a +// zero post-value deletes the snapshot entry instead of writing 32 zero +// bytes. +func TestAccessListApplicationZeroStorage(t *testing.T) { + t.Parallel() + db := rawdb.NewMemoryDatabase() + syncer := newSyncerV2(db, rawdb.HashScheme) + addr := common.HexToAddress("0x06") + accountHash := crypto.Keccak256Hash(addr[:]) + + // Existing account with a non-zero storage slot. + original := types.StateAccount{ + Nonce: 1, + Balance: uint256.NewInt(1), + Root: types.EmptyRootHash, + CodeHash: types.EmptyCodeHash[:], + } + rawdb.WriteAccountSnapshot(db, accountHash, types.SlimAccountRLP(original)) + rawSlot := common.HexToHash("0xaa") + slotHash := crypto.Keccak256Hash(rawSlot[:]) + rawdb.WriteStorageSnapshot(db, accountHash, slotHash, common.HexToHash("0x42").Bytes()) + + // BAL writes the slot to zero (deletion). + cb := bal.NewConstructionBlockAccessList() + cb.StorageWrite(0, addr, rawSlot, common.Hash{}) + b := buildTestBAL(t, cb) + applyBAL(t, syncer, b) + + if val := rawdb.ReadStorageSnapshot(db, accountHash, slotHash); len(val) != 0 { + t.Errorf("zeroed slot should have been deleted, got %x", val) + } +} + +// TestAccessListApplicationNewAccount verifies that applyAccessList creates +// new accounts that don't exist in the DB yet. +func TestAccessListApplicationNewAccount(t *testing.T) { + t.Parallel() + + db := rawdb.NewMemoryDatabase() + syncer := newSyncerV2(db, rawdb.HashScheme) + + addr := common.HexToAddress("0x03") + accountHash := crypto.Keccak256Hash(addr[:]) + + // Verify account doesn't exist + if data := rawdb.ReadAccountSnapshot(db, accountHash); len(data) > 0 { + t.Fatal("account should not exist yet") + } + + // Build BAL for a new account. BAL uses raw slot keys. + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, addr, uint256.NewInt(42)) + cb.NonceChange(addr, 0, 1) + rawSlot := common.HexToHash("0xbb") + cb.StorageWrite(0, addr, rawSlot, common.HexToHash("0xff")) + b := buildTestBAL(t, cb) + applyBAL(t, syncer, b) + + // Verify account was created + data := rawdb.ReadAccountSnapshot(db, accountHash) + if len(data) == 0 { + t.Fatal("account should exist after apply") + } + account, err := types.FullAccount(data) + if err != nil { + t.Fatalf("failed to decode new account: %v", err) + } + if account.Balance.Cmp(uint256.NewInt(42)) != 0 { + t.Errorf("balance wrong: got %v, want 42", account.Balance) + } + if account.Nonce != 1 { + t.Errorf("nonce wrong: got %d, want 1", account.Nonce) + } + if account.Root != types.EmptyRootHash { + t.Errorf("root should be empty for new account: got %v", account.Root) + } + + // Verify storage was written under keccak256(rawSlot) in the canonical + // snapshot encoding (RLP of the value with leading zeros trimmed). + slotHash := crypto.Keccak256Hash(rawSlot[:]) + storageVal := rawdb.ReadStorageSnapshot(db, accountHash, slotHash) + wantStorage, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(common.HexToHash("0xff").Bytes())) + if !bytes.Equal(storageVal, wantStorage) { + t.Errorf("storage wrong: got %x, want %x", storageVal, wantStorage) + } +} + +// TestAccessListApplicationSkipsUnfetched verifies that applyAccessList does +// not write account entries for addresses whose hash falls in a range that +// hasn't been downloaded yet. +func TestAccessListApplicationSkipsUnfetched(t *testing.T) { + t.Parallel() + db := rawdb.NewMemoryDatabase() + syncer := newSyncerV2(db, rawdb.HashScheme) + + // Pick two addresses and order them by hash. + addrA := common.HexToAddress("0x01") + addrB := common.HexToAddress("0x02") + hashA := crypto.Keccak256Hash(addrA[:]) + hashB := crypto.Keccak256Hash(addrB[:]) + fetchedAddr, fetchedHash := addrA, hashA + unfetchedAddr, unfetchedHash := addrB, hashB + if bytes.Compare(hashA[:], hashB[:]) > 0 { + fetchedAddr, fetchedHash = addrB, hashB + unfetchedAddr, unfetchedHash = addrA, hashA + } + + // One remaining task covering [unfetchedHash, MaxHash]: the fetched hash + // is below Next so isFetched returns true; the unfetched hash equals Next + // so isFetched returns false. + syncer.tasks = []*accountTaskV2{{ + Next: unfetchedHash, + Last: common.MaxHash, + SubTasks: make(map[common.Hash][]*storageTaskV2), + stateCompleted: make(map[common.Hash]struct{}), + }} + + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, fetchedAddr, uint256.NewInt(100)) + cb.BalanceChange(0, unfetchedAddr, uint256.NewInt(200)) + b := buildTestBAL(t, cb) + + applyBAL(t, syncer, b) + + // The fetched account should have been written. + if data := rawdb.ReadAccountSnapshot(db, fetchedHash); len(data) == 0 { + t.Error("expected fetched account to be written") + } + // The unfetched account should not have been touched. + if data := rawdb.ReadAccountSnapshot(db, unfetchedHash); len(data) != 0 { + t.Errorf("unfetched account should not be written, got %x", data) + } +} + +// TestAccessListApplicationSkipsUnfetchedStorage verifies that storage writes +// are also skipped when the parent account's hash range isn't downloaded yet. +func TestAccessListApplicationSkipsUnfetchedStorage(t *testing.T) { + t.Parallel() + db := rawdb.NewMemoryDatabase() + syncer := newSyncerV2(db, rawdb.HashScheme) + + addrA := common.HexToAddress("0x01") + addrB := common.HexToAddress("0x02") + hashA := crypto.Keccak256Hash(addrA[:]) + hashB := crypto.Keccak256Hash(addrB[:]) + + unfetchedAddr, unfetchedHash := addrB, hashB + if bytes.Compare(hashA[:], hashB[:]) > 0 { + unfetchedAddr, unfetchedHash = addrA, hashA + } + + syncer.tasks = []*accountTaskV2{{ + Next: unfetchedHash, + Last: common.MaxHash, + SubTasks: make(map[common.Hash][]*storageTaskV2), + stateCompleted: make(map[common.Hash]struct{}), + }} + + // BAL touches an unfetched account with a storage write AND an empty + // balance mutation. Neither should result in any flat-state writes. + rawSlot := common.HexToHash("0xaa") + slotHash := crypto.Keccak256Hash(rawSlot[:]) + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, unfetchedAddr, uint256.NewInt(0)) // empty mutation + cb.StorageWrite(0, unfetchedAddr, rawSlot, common.HexToHash("0xff")) + b := buildTestBAL(t, cb) + + applyBAL(t, syncer, b) + + if data := rawdb.ReadAccountSnapshot(db, unfetchedHash); len(data) != 0 { + t.Errorf("unfetched account should not be written, got %x", data) + } + if val := rawdb.ReadStorageSnapshot(db, unfetchedHash, slotHash); len(val) != 0 { + t.Errorf("storage for unfetched account should not be written, got %x", val) + } +} + +// TestAccessListApplicationPartialStorage verifies that for a large contract +// whose account hasn't been committed yet but whose storage is partially downloaded, +// applyAccessList rolls forward the slots below the active subtask frontier +// while skipping the ones above it and the account-level fields. +func TestAccessListApplicationPartialStorage(t *testing.T) { + t.Parallel() + db := rawdb.NewMemoryDatabase() + syncer := newSyncerV2(db, rawdb.HashScheme) + + addr := common.HexToAddress("0xc0") + accountHash := crypto.Keccak256Hash(addr[:]) + + // Two slots, ordered by their storage hash. The subtask frontier sits at + // the higher one so the lower slot is fetched and the higher is not. + loRaw := common.HexToHash("0xaa") + hiRaw := common.HexToHash("0xbb") + loHash := crypto.Keccak256Hash(loRaw[:]) + hiHash := crypto.Keccak256Hash(hiRaw[:]) + if bytes.Compare(loHash[:], hiHash[:]) > 0 { + loRaw, hiRaw = hiRaw, loRaw + loHash, hiHash = hiHash, loHash + } + + // The account sits exactly at Next (held back behind storage retrieval), so + // isFetched returns false. Its subtask has fetched everything below hiHash. + syncer.tasks = []*accountTaskV2{{ + Next: accountHash, + Last: common.MaxHash, + SubTasks: map[common.Hash][]*storageTaskV2{ + accountHash: {{ + Next: hiHash, + Last: common.MaxHash, + }}, + }, + stateCompleted: make(map[common.Hash]struct{}), + }} + + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, addr, uint256.NewInt(500)) // account update must be skipped + cb.StorageWrite(0, addr, loRaw, common.HexToHash("0x11")) + cb.StorageWrite(0, addr, hiRaw, common.HexToHash("0x22")) + b := buildTestBAL(t, cb) + + applyBAL(t, syncer, b) + + // Account must not be written: it's still being filled. + if data := rawdb.ReadAccountSnapshot(db, accountHash); len(data) != 0 { + t.Errorf("account below Next should not be written, got %x", data) + } + // Slot below the frontier must be rolled forward. + wantLo, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(common.HexToHash("0x11").Bytes())) + if val := rawdb.ReadStorageSnapshot(db, accountHash, loHash); !bytes.Equal(val, wantLo) { + t.Errorf("fetched slot wrong: got %x, want %x", val, wantLo) + } + // Slot above the frontier must be skipped. + if val := rawdb.ReadStorageSnapshot(db, accountHash, hiHash); len(val) != 0 { + t.Errorf("unfetched slot should not be written, got %x", val) + } +} + +func TestIsStorageFetched(t *testing.T) { + t.Parallel() + db := rawdb.NewMemoryDatabase() + syncer := newSyncerV2(db, rawdb.HashScheme) + + var ( + fetchedAcct = common.HexToHash("0x10") // below Next + fillingAcct = common.HexToHash("0x40") // == Next + beyondAcct = common.HexToHash("0x90") // above Last + ) + prunedHiAcct := common.HexToHash("0x70") // in [Next, Last], still filling + completeAcct := common.HexToHash("0x71") // in [Next, Last], storage is complete + + syncer.tasks = []*accountTaskV2{{ + Next: fillingAcct, + Last: common.HexToHash("0x80"), + SubTasks: map[common.Hash][]*storageTaskV2{ + fillingAcct: {{ + Next: common.HexToHash("0x50"), + Last: common.MaxHash, + }}, + prunedHiAcct: {{ + Next: common.HexToHash("0x30"), + Last: common.HexToHash("0x60"), + }}, + }, + stateCompleted: map[common.Hash]struct{}{ + completeAcct: {}, + }, + }} + + noSubAcct := common.HexToHash("0x60") // in [Next,Last] but no subtasks yet + tests := []struct { + name string + account common.Hash + slot common.Hash + want bool + }{ + {"account fully synced", fetchedAcct, common.HexToHash("0xff"), true}, + {"storage fully synced", completeAcct, common.HexToHash("0xff"), true}, + + {"account before all tasks", common.HexToHash("0x01"), common.HexToHash("0xff"), true}, + {"account beyond all tasks", beyondAcct, common.HexToHash("0xff"), true}, + + {"slot below storage frontier", fillingAcct, common.HexToHash("0x20"), true}, + {"slot at storage frontier", fillingAcct, common.HexToHash("0x50"), false}, + {"slot above storage frontier", fillingAcct, common.HexToHash("0x70"), false}, + {"account filling, no subtasks", noSubAcct, common.HexToHash("0x01"), false}, + + {"slot in pruned low range", prunedHiAcct, common.HexToHash("0x10"), true}, + {"slot at remaining frontier", prunedHiAcct, common.HexToHash("0x30"), false}, + {"slot within remaining range", prunedHiAcct, common.HexToHash("0x50"), false}, + {"slot in pruned high range", prunedHiAcct, common.HexToHash("0x90"), true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := syncer.isStorageFetched(tt.account, tt.slot); got != tt.want { + t.Errorf("isStorageFetched(%v, %v) = %v, want %v", tt.account, tt.slot, got, tt.want) + } + }) + } +} + +// TestAccessListApplicationSameTxCreateDestroy tests the edge case where an +// account is created and self-destructed in the same transaction during the +// pivot gap. Per EIP-7928, such accounts appear in the BAL with a balance +// change to zero but no nonce or code changes. Since the account didn't exist +// at the old pivot and doesn't exist at the new pivot (destroyed), +// applyAccessList should not leave a zero-balance account in the snapshot. +// Per EIP-161, empty accounts (zero balance, zero nonce, no code) must not exist +// in state. +func TestAccessListApplicationSameTxCreateDestroy(t *testing.T) { + t.Parallel() + db := rawdb.NewMemoryDatabase() + syncer := newSyncerV2(db, rawdb.HashScheme) + addr := common.HexToAddress("0x04") + accountHash := crypto.Keccak256Hash(addr[:]) + + // Verify account doesn't exist before apply + if data := rawdb.ReadAccountSnapshot(db, accountHash); len(data) > 0 { + t.Fatal("account should not exist yet") + } + + // Build a BAL mimicking same-tx create+destroy: the account appears + // with a balance change to zero and nothing else. + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, addr, uint256.NewInt(0)) + b := buildTestBAL(t, cb) + applyBAL(t, syncer, b) + + // Check if applyAccessList created an account. + data := rawdb.ReadAccountSnapshot(db, accountHash) + if len(data) > 0 { + // Account was created + account, err := types.FullAccount(data) + if err != nil { + t.Fatalf("failed to decode account: %v", err) + } + t.Errorf("account created for same-tx create+destroy: "+ + "balance=%v, nonce=%d, codeHash=%x, root=%v", + account.Balance, account.Nonce, account.CodeHash, account.Root) + } +} + +// TestAccessListApplicationDestroyExisting verifies that when a BAL reduces +// an existing flat-state account to nonce=0, balance=0, empty code (the +// pre-funded destruction pattern), applyAccessList deletes the entry rather +// than leaving it zereod. +func TestAccessListApplicationDestroyExisting(t *testing.T) { + t.Parallel() + db := rawdb.NewMemoryDatabase() + syncer := newSyncerV2(db, rawdb.HashScheme) + addr := common.HexToAddress("0x05") + accountHash := crypto.Keccak256Hash(addr[:]) + + // Pre-funded account: has balance, no nonce, no code. + original := types.StateAccount{ + Nonce: 0, + Balance: uint256.NewInt(1000), + Root: types.EmptyRootHash, + CodeHash: types.EmptyCodeHash[:], + } + rawdb.WriteAccountSnapshot(db, accountHash, types.SlimAccountRLP(original)) + + // The BAL zeros the balance. Nonce and code were already empty, so + // the account ends up fully empty after applying. + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, addr, uint256.NewInt(0)) + b := buildTestBAL(t, cb) + applyBAL(t, syncer, b) + + if data := rawdb.ReadAccountSnapshot(db, accountHash); len(data) != 0 { + account, _ := types.FullAccount(data) + t.Errorf("destroyed account should have been deleted from flat state, "+ + "got balance=%v, nonce=%d, codeHash=%x", + account.Balance, account.Nonce, account.CodeHash) + } +} diff --git a/eth/protocols/snap/handler.go b/eth/protocols/snap/handler.go index 26545f2960..7adff5dc0f 100644 --- a/eth/protocols/snap/handler.go +++ b/eth/protocols/snap/handler.go @@ -79,10 +79,16 @@ type Backend interface { Handle(peer *Peer, packet Packet) error } -// MakeProtocols constructs the P2P protocol definitions for `snap`. -func MakeProtocols(backend Backend) []p2p.Protocol { - protocols := make([]p2p.Protocol, len(ProtocolVersions)) - for i, version := range ProtocolVersions { +// MakeProtocols constructs the P2P protocol definitions for `snap`. When snapV2 +// is set, the snap/2 version is advertised in addition to the default versions; +// otherwise only the default (snap/1) versions are offered on the wire. +func MakeProtocols(backend Backend, snapV2 bool) []p2p.Protocol { + versions := ProtocolVersions + if snapV2 { + versions = append([]uint{SNAP2}, versions...) + } + protocols := make([]p2p.Protocol, len(versions)) + for i, version := range versions { protocols[i] = p2p.Protocol{ Name: ProtocolName, Version: version, @@ -132,7 +138,6 @@ var snap1 = map[uint64]msgHandler{ TrieNodesMsg: handleTrieNodes, } -// nolint:unused var snap2 = map[uint64]msgHandler{ GetAccountRangeMsg: handleGetAccountRange, AccountRangeMsg: handleAccountRange, @@ -141,7 +146,7 @@ var snap2 = map[uint64]msgHandler{ GetByteCodesMsg: handleGetByteCodes, ByteCodesMsg: handleByteCodes, GetAccessListsMsg: handleGetAccessLists, - // AccessListsMsg: TODO + AccessListsMsg: handleAccessLists, } // HandleMessage is invoked whenever an inbound message is received from a @@ -162,8 +167,8 @@ func HandleMessage(backend Backend, peer *Peer) error { switch peer.version { case SNAP1: handlers = snap1 - //case SNAP2: - // handlers = snap2 + case SNAP2: + handlers = snap2 default: return fmt.Errorf("unknown eth protocol version: %v", peer.version) } diff --git a/eth/protocols/snap/handlers.go b/eth/protocols/snap/handlers.go index 89c6c52f7f..106322de90 100644 --- a/eth/protocols/snap/handlers.go +++ b/eth/protocols/snap/handlers.go @@ -553,7 +553,6 @@ func handleTrieNodes(backend Backend, msg Decoder, peer *Peer) error { return backend.Handle(peer, &TrieNodesPacket{res.ID, nodes}) } -// nolint:unused func handleGetAccessLists(backend Backend, msg Decoder, peer *Peer) error { var req GetAccessListsPacket if err := msg.Decode(&req); err != nil { @@ -598,3 +597,15 @@ func ServiceGetAccessListsQuery(chain *core.BlockChain, req *GetAccessListsPacke } return response } + +func handleAccessLists(backend Backend, msg Decoder, peer *Peer) error { + res := new(AccessListsPacket) + if err := msg.Decode(res); err != nil { + return fmt.Errorf("%w: message %v: %v", errDecode, msg, err) + } + tresp := tracker.Response{ID: res.ID, MsgCode: AccessListsMsg, Size: res.AccessLists.Len()} + if err := peer.tracker.Fulfil(tresp); err != nil { + return fmt.Errorf("BALs: %w", err) + } + return backend.Handle(peer, res) +} diff --git a/eth/protocols/snap/peer.go b/eth/protocols/snap/peer.go index 0b96de4158..b46c934c06 100644 --- a/eth/protocols/snap/peer.go +++ b/eth/protocols/snap/peer.go @@ -155,7 +155,7 @@ func (p *Peer) RequestByteCodes(id uint64, hashes []common.Hash, bytes int) erro } // RequestTrieNodes fetches a batch of account or storage trie nodes rooted in -// a specific state trie. The `count` is the total count of paths being requested. +// a specific state trie, or off of a specific account. func (p *Peer) RequestTrieNodes(id uint64, root common.Hash, count int, paths []TrieNodePathSet, bytes int) error { p.logger.Trace("Fetching set of trie nodes", "reqid", id, "root", root, "pathsets", len(paths), "bytes", common.StorageSize(bytes)) @@ -176,3 +176,22 @@ func (p *Peer) RequestTrieNodes(id uint64, root common.Hash, count int, paths [] Bytes: uint64(bytes), }) } + +// RequestAccessLists fetches a batch of BALs by block hash. +func (p *Peer) RequestAccessLists(id uint64, hashes []common.Hash, bytes int) error { + p.logger.Trace("Fetching set of BALs", "reqid", id, "hashes", len(hashes), "bytes", common.StorageSize(bytes)) + err := p.tracker.Track(tracker.Request{ + ReqCode: GetAccessListsMsg, + RespCode: AccessListsMsg, + ID: id, + Size: len(hashes), + }) + if err != nil { + return err + } + return p2p.Send(p.rw, GetAccessListsMsg, &GetAccessListsPacket{ + ID: id, + Hashes: hashes, + Bytes: uint64(bytes), + }) +} diff --git a/eth/protocols/snap/progress_test.go b/eth/protocols/snap/progress_test.go index 1d9a6b8474..21ec36c2b5 100644 --- a/eth/protocols/snap/progress_test.go +++ b/eth/protocols/snap/progress_test.go @@ -18,9 +18,12 @@ package snap import ( "encoding/json" + "math/big" "testing" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" ) // Legacy sync progress definitions @@ -39,7 +42,7 @@ type legacyProgress struct { Tasks []*legacyAccountTask // The suspended account tasks (contract tasks within) } -func compareProgress(a legacyProgress, b SyncProgress) bool { +func compareProgress(a legacyProgress, b syncProgress) bool { if len(a.Tasks) != len(b.Tasks) { return false } @@ -96,8 +99,8 @@ func makeLegacyProgress() legacyProgress { } } -func convertLegacy(legacy legacyProgress) SyncProgress { - var progress SyncProgress +func convertLegacy(legacy legacyProgress) syncProgress { + var progress syncProgress for i, task := range legacy.Tasks { subTasks := make(map[common.Hash][]*storageTask) for owner, list := range task.SubTasks { @@ -130,7 +133,7 @@ func TestSyncProgressCompatibility(t *testing.T) { if err != nil { t.Fatalf("Failed to marshal progress %v", err) } - var dec SyncProgress + var dec syncProgress if err := json.Unmarshal(blob, &dec); err != nil { t.Fatalf("Failed to unmarshal progress %v", err) } @@ -152,3 +155,116 @@ func TestSyncProgressCompatibility(t *testing.T) { t.Fatal("sync progress is not forward compatible") } } + +// TestSyncProgressV1Discarded verifies that a persisted blob written in the +// old unversioned format (raw JSON, no version prefix) is detected and +// discarded on load, that the syncer falls through to a fresh start, and +// that any orphan flat-state entries from the prior format are wiped. +func TestSyncProgressV1Discarded(t *testing.T) { + db := rawdb.NewMemoryDatabase() + + // Write a raw JSON blob (no version byte) to simulate progress persisted + // by a prior geth binary (snap/1 format). + legacy := map[string]any{ + "Root": common.HexToHash("0xaaaa"), + "BlockNumber": uint64(42), + "Tasks": []any{}, + } + blob, err := json.Marshal(legacy) + if err != nil { + t.Fatalf("marshal legacy: %v", err) + } + rawdb.WriteSnapshotSyncStatus(db, blob) + + // Pre-write orphan flat-state entries that should be wiped on fresh start. + orphanAccountHash := common.HexToHash("0xdeadbeef") + rawdb.WriteAccountSnapshot(db, orphanAccountHash, []byte{0xde, 0xad}) + orphanStorageAccount := common.HexToHash("0xfeedface") + orphanStorageSlot := common.HexToHash("0xabcd") + rawdb.WriteStorageSnapshot(db, orphanStorageAccount, orphanStorageSlot, []byte{0xff, 0xff}) + + syncer := newSyncerV2(db, rawdb.HashScheme) + syncer.loadSyncStatus() + + if syncer.previousPivot != nil { + t.Fatalf("expected previousPivot nil after discarding old format, got %+v", syncer.previousPivot) + } + if len(syncer.tasks) != accountConcurrency { + t.Fatalf("expected fresh task split of %d, got %d", accountConcurrency, len(syncer.tasks)) + } + if data := rawdb.ReadAccountSnapshot(db, orphanAccountHash); len(data) != 0 { + t.Errorf("orphan account snapshot should be wiped, got %x", data) + } + if val := rawdb.ReadStorageSnapshot(db, orphanStorageAccount, orphanStorageSlot); len(val) != 0 { + t.Errorf("orphan storage snapshot should be wiped, got %x", val) + } +} + +// TestSyncProgressV2RoundTrip verifies that the persisted blob is framed +// with the expected version byte at offset 0, and that all six status +// counters survive the round-trip. +func TestSyncProgressV2RoundTrip(t *testing.T) { + db := rawdb.NewMemoryDatabase() + + saver := newSyncerV2(db, rawdb.HashScheme) + saver.pivot = &types.Header{Number: new(big.Int).SetUint64(123), Difficulty: common.Big0} + saver.accountSynced = 1 + saver.accountBytes = 2 + saver.bytecodeSynced = 3 + saver.bytecodeBytes = 4 + saver.storageSynced = 5 + saver.storageBytes = 6 + saver.saveSyncStatus() + + raw := rawdb.ReadSnapshotSyncStatus(db) + if len(raw) == 0 || raw[0] != syncProgressVersion { + t.Fatalf("expected version byte %d at offset 0, got blob %x", syncProgressVersion, raw) + } + + loader := newSyncerV2(db, rawdb.HashScheme) + loader.loadSyncStatus() + for _, c := range []struct { + name string + got uint64 + want uint64 + }{ + {"accountSynced", loader.accountSynced, 1}, + {"accountBytes", uint64(loader.accountBytes), 2}, + {"bytecodeSynced", loader.bytecodeSynced, 3}, + {"bytecodeBytes", uint64(loader.bytecodeBytes), 4}, + {"storageSynced", loader.storageSynced, 5}, + {"storageBytes", uint64(loader.storageBytes), 6}, + } { + if c.got != c.want { + t.Errorf("%s mismatch: got %d, want %d", c.name, c.got, c.want) + } + } +} + +// TestSyncProgressCorruptPayload verifies that a persisted blob with the +// correct version byte but unparseable JSON body is discarded, triggers a +// fresh-start fall-through (not a panic or a stale-state load), and the +// orphan flat state is wiped along with the corrupt status. +func TestSyncProgressCorruptPayload(t *testing.T) { + db := rawdb.NewMemoryDatabase() + + // Version byte followed by garbage that isn't valid JSON. + rawdb.WriteSnapshotSyncStatus(db, []byte{syncProgressVersion, 0x7b, 0x7b, 0x7b}) + + // Pre-write orphan flat-state entries that should be wiped on fresh start. + orphanAccountHash := common.HexToHash("0xdeadbeef") + rawdb.WriteAccountSnapshot(db, orphanAccountHash, []byte{0xde, 0xad}) + + syncer := newSyncerV2(db, rawdb.HashScheme) + syncer.loadSyncStatus() + + if syncer.previousPivot != nil { + t.Fatalf("expected previousPivot nil after corrupt payload, got %+v", syncer.previousPivot) + } + if len(syncer.tasks) != accountConcurrency { + t.Fatalf("expected fresh task split of %d, got %d", accountConcurrency, len(syncer.tasks)) + } + if data := rawdb.ReadAccountSnapshot(db, orphanAccountHash); len(data) != 0 { + t.Errorf("orphan account snapshot should be wiped, got %x", data) + } +} diff --git a/eth/protocols/snap/protocol.go b/eth/protocols/snap/protocol.go index 685f468da3..e14ca1283d 100644 --- a/eth/protocols/snap/protocol.go +++ b/eth/protocols/snap/protocol.go @@ -28,20 +28,22 @@ import ( // Constants to match up protocol versions and messages const ( SNAP1 = 1 - //SNAP2 = 2 + SNAP2 = 2 ) // ProtocolName is the official short name of the `snap` protocol used during // devp2p capability negotiation. const ProtocolName = "snap" -// ProtocolVersions are the supported versions of the `snap` protocol (first -// is primary). +// ProtocolVersions are the supported versions of the `snap` protocol advertised +// by default (first is primary). snap/2 is not safe to advertise unconditionally +// yet, so it is gated behind a feature flag and appended in MakeProtocols rather +// than listed here. var ProtocolVersions = []uint{SNAP1} -// protocolLengths are the number of implemented message corresponding to -// different protocol versions. -var protocolLengths = map[uint]uint64{ /*SNAP2: 10,*/ SNAP1: 8} +// protocolLengths are the number of implemented messages corresponding to +// different protocol versions. snap/2 adds GetAccessLists/AccessLists (0x08/0x09). +var protocolLengths = map[uint]uint64{SNAP2: 10, SNAP1: 8} // maxMessageSize is the maximum cap on the size of a protocol message. const maxMessageSize = 10 * 1024 * 1024 diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 39534865c1..228fa27416 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -375,10 +375,10 @@ type healTask struct { codeTasks map[common.Hash]struct{} // Set of byte code tasks currently queued for retrieval, indexed by code hash } -// SyncProgress is a database entry to allow suspending and resuming a snapshot state +// syncProgress is a database entry to allow suspending and resuming a snapshot state // sync. Opposed to full and fast sync, there is no way to restart a suspended // snap sync without prior knowledge of the suspension point. -type SyncProgress struct { +type syncProgress struct { Tasks []*accountTask // The suspended account tasks (contract tasks within) // Status report during syncing phase @@ -396,9 +396,9 @@ type SyncProgress struct { BytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk } -// SyncPending is analogous to SyncProgress, but it's used to report on pending +// syncPending is analogous to syncProgress, but it's used to report on pending // ephemeral sync progress that doesn't get persisted into the database. -type SyncPending struct { +type syncPending struct { TrienodeHeal uint64 // Number of state trie nodes pending BytecodeHeal uint64 // Number of bytecodes pending } @@ -430,7 +430,7 @@ type SyncPeer interface { Log() log.Logger } -// Syncer is an Ethereum account and storage trie syncer based on snapshots and +// syncer is an Ethereum account and storage trie syncer based on snapshots and // the snap protocol. It's purpose is to download all the accounts and storage // slots from remote peers and reassemble chunks of the state trie, on top of // which a state sync can be run to fix any gaps / overlaps. @@ -441,7 +441,7 @@ type SyncPeer interface { // - The peer remains connected, but does not deliver a response in time // - The peer delivers a stale response after a previous timeout // - The peer delivers a refusal to serve the requested state -type Syncer struct { +type syncer struct { db ethdb.KeyValueStore // Database to store the trie nodes into (and dedup) scheme string // Node scheme used in node database @@ -473,7 +473,7 @@ type Syncer struct { storageSynced uint64 // Number of storage slots downloaded storageBytes common.StorageSize // Number of storage trie bytes persisted to disk - extProgress *SyncProgress // progress that can be exposed to external caller. + extProgress *syncProgress // progress that can be exposed to external caller. // Request tracking during healing phase trienodeHealIdlers map[string]struct{} // Peers that aren't serving trie node requests @@ -511,10 +511,10 @@ type Syncer struct { lock sync.RWMutex // Protects fields that can change outside of sync (peers, reqs, root) } -// NewSyncer creates a new snapshot syncer to download the Ethereum state over the -// snap protocol. -func NewSyncer(db ethdb.KeyValueStore, scheme string) *Syncer { - return &Syncer{ +// newSyncer creates the snap/1 state syncer. It is unexported; callers outside +// the package obtain a Syncer through NewV1Syncer. +func newSyncer(db ethdb.KeyValueStore, scheme string) *syncer { + return &syncer{ db: db, scheme: scheme, @@ -540,12 +540,12 @@ func NewSyncer(db ethdb.KeyValueStore, scheme string) *Syncer { trienodeHealThrottle: maxTrienodeHealThrottle, // Tune downward instead of insta-filling with junk stateWriter: db.NewBatch(), - extProgress: new(SyncProgress), + extProgress: new(syncProgress), } } // Register injects a new data source into the syncer's peerset. -func (s *Syncer) Register(peer SyncPeer) error { +func (s *syncer) Register(peer SyncPeer) error { // Make sure the peer is not registered yet id := peer.ID() @@ -573,7 +573,7 @@ func (s *Syncer) Register(peer SyncPeer) error { } // Unregister injects a new data source into the syncer's peerset. -func (s *Syncer) Unregister(id string) error { +func (s *syncer) Unregister(id string) error { // Remove all traces of the peer from the registry s.lock.Lock() if _, ok := s.peers[id]; !ok { @@ -604,7 +604,7 @@ func (s *Syncer) Unregister(id string) error { // with the given root and reconstruct the nodes based on the snapshot leaves. // Previously downloaded segments will not be redownloaded of fixed, rather any // errors will be healed after the leaves are fully accumulated. -func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { +func (s *syncer) Sync(root common.Hash, cancel chan struct{}) error { // Move the trie root from any previous value, revert stateless markers for // any peers and initialize the syncer if it was not yet run s.lock.Lock() @@ -719,7 +719,7 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { } // Update sync progress s.lock.Lock() - s.extProgress = &SyncProgress{ + s.extProgress = &syncProgress{ AccountSynced: s.accountSynced, AccountBytes: s.accountBytes, BytecodeSynced: s.bytecodeSynced, @@ -772,8 +772,8 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { // loadSyncStatus retrieves a previously aborted sync status from the database, // or generates a fresh one if none is available. -func (s *Syncer) loadSyncStatus() { - var progress SyncProgress +func (s *syncer) loadSyncStatus() { + var progress syncProgress if status := rawdb.ReadSnapshotSyncStatus(s.db); status != nil { if err := json.Unmarshal(status, &progress); err != nil { @@ -891,7 +891,7 @@ func (s *Syncer) loadSyncStatus() { } // saveSyncStatus marshals the remaining sync tasks into leveldb. -func (s *Syncer) saveSyncStatus() { +func (s *syncer) saveSyncStatus() { // Serialize any partial progress to disk before spinning down for _, task := range s.tasks { // Claim the right boundary as incomplete before flushing the @@ -921,7 +921,7 @@ func (s *Syncer) saveSyncStatus() { } } // Store the actual progress markers - progress := &SyncProgress{ + progress := &syncProgress{ Tasks: s.tasks, AccountSynced: s.accountSynced, AccountBytes: s.accountBytes, @@ -942,10 +942,10 @@ func (s *Syncer) saveSyncStatus() { } // Progress returns the snap sync status statistics. -func (s *Syncer) Progress() (*SyncProgress, *SyncPending) { +func (s *syncer) Progress() (*syncProgress, *syncPending) { s.lock.Lock() defer s.lock.Unlock() - pending := new(SyncPending) + pending := new(syncPending) if s.healer != nil { pending.TrienodeHeal = uint64(len(s.healer.trieTasks)) pending.BytecodeHeal = uint64(len(s.healer.codeTasks)) @@ -955,7 +955,7 @@ func (s *Syncer) Progress() (*SyncProgress, *SyncPending) { // cleanAccountTasks removes account range retrieval tasks that have already been // completed. -func (s *Syncer) cleanAccountTasks() { +func (s *syncer) cleanAccountTasks() { // If the sync was already done before, don't even bother if len(s.tasks) == 0 { return @@ -980,7 +980,7 @@ func (s *Syncer) cleanAccountTasks() { // cleanStorageTasks iterates over all the account tasks and storage sub-tasks // within, cleaning any that have been completed. -func (s *Syncer) cleanStorageTasks() { +func (s *syncer) cleanStorageTasks() { for _, task := range s.tasks { for account, subtasks := range task.SubTasks { // Remove storage range retrieval tasks that completed @@ -1017,7 +1017,7 @@ func (s *Syncer) cleanStorageTasks() { // assignAccountTasks attempts to match idle peers to pending account range // retrievals. -func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *accountRequest, cancel chan struct{}) { +func (s *syncer) assignAccountTasks(success chan *accountResponse, fail chan *accountRequest, cancel chan struct{}) { s.lock.Lock() defer s.lock.Unlock() @@ -1114,7 +1114,7 @@ func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *ac } // assignBytecodeTasks attempts to match idle peers to pending code retrievals. -func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *bytecodeRequest, cancel chan struct{}) { +func (s *syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *bytecodeRequest, cancel chan struct{}) { s.lock.Lock() defer s.lock.Unlock() @@ -1217,7 +1217,7 @@ func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan * // assignStorageTasks attempts to match idle peers to pending storage range // retrievals. -func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *storageRequest, cancel chan struct{}) { +func (s *syncer) assignStorageTasks(success chan *storageResponse, fail chan *storageRequest, cancel chan struct{}) { s.lock.Lock() defer s.lock.Unlock() @@ -1374,7 +1374,7 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st // assignTrienodeHealTasks attempts to match idle peers to trie node requests to // heal any trie errors caused by the snap sync's chunked retrieval model. -func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fail chan *trienodeHealRequest, cancel chan struct{}) { +func (s *syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fail chan *trienodeHealRequest, cancel chan struct{}) { s.lock.Lock() defer s.lock.Unlock() @@ -1502,7 +1502,7 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to // heal any trie errors caused by the snap sync's chunked retrieval model. -func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fail chan *bytecodeHealRequest, cancel chan struct{}) { +func (s *syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fail chan *bytecodeHealRequest, cancel chan struct{}) { s.lock.Lock() defer s.lock.Unlock() @@ -1618,7 +1618,7 @@ func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fai // revertRequests locates all the currently pending requests from a particular // peer and reverts them, rescheduling for others to fulfill. -func (s *Syncer) revertRequests(peer string) { +func (s *syncer) revertRequests(peer string) { // Gather the requests first, revertals need the lock too s.lock.Lock() var accountReqs []*accountRequest @@ -1673,7 +1673,7 @@ func (s *Syncer) revertRequests(peer string) { // scheduleRevertAccountRequest asks the event loop to clean up an account range // request and return all failed retrieval tasks to the scheduler for reassignment. -func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) { +func (s *syncer) scheduleRevertAccountRequest(req *accountRequest) { select { case req.revert <- req: // Sync event loop notified @@ -1689,7 +1689,7 @@ func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) { // // Note, this needs to run on the event runloop thread to reschedule to idle peers. // On peer threads, use scheduleRevertAccountRequest. -func (s *Syncer) revertAccountRequest(req *accountRequest) { +func (s *syncer) revertAccountRequest(req *accountRequest) { log.Debug("Reverting account request", "peer", req.peer, "reqid", req.id) select { case <-req.stale: @@ -1718,7 +1718,7 @@ func (s *Syncer) revertAccountRequest(req *accountRequest) { // scheduleRevertBytecodeRequest asks the event loop to clean up a bytecode request // and return all failed retrieval tasks to the scheduler for reassignment. -func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) { +func (s *syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) { select { case req.revert <- req: // Sync event loop notified @@ -1734,7 +1734,7 @@ func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) { // // Note, this needs to run on the event runloop thread to reschedule to idle peers. // On peer threads, use scheduleRevertBytecodeRequest. -func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) { +func (s *syncer) revertBytecodeRequest(req *bytecodeRequest) { log.Debug("Reverting bytecode request", "peer", req.peer) select { case <-req.stale: @@ -1763,7 +1763,7 @@ func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) { // scheduleRevertStorageRequest asks the event loop to clean up a storage range // request and return all failed retrieval tasks to the scheduler for reassignment. -func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) { +func (s *syncer) scheduleRevertStorageRequest(req *storageRequest) { select { case req.revert <- req: // Sync event loop notified @@ -1779,7 +1779,7 @@ func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) { // // Note, this needs to run on the event runloop thread to reschedule to idle peers. // On peer threads, use scheduleRevertStorageRequest. -func (s *Syncer) revertStorageRequest(req *storageRequest) { +func (s *syncer) revertStorageRequest(req *storageRequest) { log.Debug("Reverting storage request", "peer", req.peer) select { case <-req.stale: @@ -1812,7 +1812,7 @@ func (s *Syncer) revertStorageRequest(req *storageRequest) { // scheduleRevertTrienodeHealRequest asks the event loop to clean up a trienode heal // request and return all failed retrieval tasks to the scheduler for reassignment. -func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) { +func (s *syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) { select { case req.revert <- req: // Sync event loop notified @@ -1828,7 +1828,7 @@ func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) { // // Note, this needs to run on the event runloop thread to reschedule to idle peers. // On peer threads, use scheduleRevertTrienodeHealRequest. -func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) { +func (s *syncer) revertTrienodeHealRequest(req *trienodeHealRequest) { log.Debug("Reverting trienode heal request", "peer", req.peer) select { case <-req.stale: @@ -1857,7 +1857,7 @@ func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) { // scheduleRevertBytecodeHealRequest asks the event loop to clean up a bytecode heal // request and return all failed retrieval tasks to the scheduler for reassignment. -func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) { +func (s *syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) { select { case req.revert <- req: // Sync event loop notified @@ -1873,7 +1873,7 @@ func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) { // // Note, this needs to run on the event runloop thread to reschedule to idle peers. // On peer threads, use scheduleRevertBytecodeHealRequest. -func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) { +func (s *syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) { log.Debug("Reverting bytecode heal request", "peer", req.peer) select { case <-req.stale: @@ -1902,7 +1902,7 @@ func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) { // processAccountResponse integrates an already validated account range response // into the account tasks. -func (s *Syncer) processAccountResponse(res *accountResponse) { +func (s *syncer) processAccountResponse(res *accountResponse) { // Switch the task from pending to filling res.task.req = nil res.task.res = res @@ -2024,7 +2024,7 @@ func (s *Syncer) processAccountResponse(res *accountResponse) { // processBytecodeResponse integrates an already validated bytecode response // into the account tasks. -func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) { +func (s *syncer) processBytecodeResponse(res *bytecodeResponse) { batch := s.db.NewBatch() var codes uint64 @@ -2068,7 +2068,7 @@ func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) { // processStorageResponse integrates an already validated storage response // into the account tasks. -func (s *Syncer) processStorageResponse(res *storageResponse) { +func (s *syncer) processStorageResponse(res *storageResponse) { // Switch the subtask from pending to idle if res.subTask != nil { res.subTask.req = nil @@ -2308,7 +2308,7 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { // processTrienodeHealResponse integrates an already validated trienode response // into the healer tasks. -func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { +func (s *syncer) processTrienodeHealResponse(res *trienodeHealResponse) { var ( start = time.Now() fills int @@ -2385,7 +2385,7 @@ func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { } } -func (s *Syncer) commitHealer(force bool) { +func (s *syncer) commitHealer(force bool) { if !force && s.healer.scheduler.MemSize() < ethdb.IdealBatchSize { return } @@ -2401,7 +2401,7 @@ func (s *Syncer) commitHealer(force bool) { // processBytecodeHealResponse integrates an already validated bytecode response // into the healer tasks. -func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) { +func (s *syncer) processBytecodeHealResponse(res *bytecodeHealResponse) { for i, hash := range res.hashes { node := res.codes[i] @@ -2431,7 +2431,7 @@ func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) { // forwardAccountTask takes a filled account task and persists anything available // into the database, after which it forwards the next account marker so that the // task's next chunk may be filled. -func (s *Syncer) forwardAccountTask(task *accountTask) { +func (s *syncer) forwardAccountTask(task *accountTask) { // Remove any pending delivery res := task.res if res == nil { @@ -2521,7 +2521,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { // OnAccounts is a callback method to invoke when a range of accounts are // received from a remote peer. -func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error { +func (s *syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error { size := common.StorageSize(len(hashes) * common.HashLength) for _, account := range accounts { size += common.StorageSize(len(account)) @@ -2621,7 +2621,7 @@ func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, acco // OnByteCodes is a callback method to invoke when a batch of contract // bytes codes are received from a remote peer. -func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { +func (s *syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { s.lock.RLock() syncing := !s.snapped s.lock.RUnlock() @@ -2634,7 +2634,7 @@ func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error // onByteCodes is a callback method to invoke when a batch of contract // bytes codes are received from a remote peer in the syncing phase. -func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { +func (s *syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { var size common.StorageSize for _, code := range bytecodes { size += common.StorageSize(len(code)) @@ -2732,7 +2732,7 @@ func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error // OnStorage is a callback method to invoke when ranges of storage slots // are received from a remote peer. -func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error { +func (s *syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error { // Gather some trace stats to aid in debugging issues var ( hashCount int @@ -2881,7 +2881,7 @@ func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slo // OnTrieNodes is a callback method to invoke when a batch of trie nodes // are received from a remote peer. -func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error { +func (s *syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error { var size common.StorageSize for _, node := range trienodes { size += common.StorageSize(len(node)) @@ -2988,7 +2988,7 @@ func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error // onHealByteCodes is a callback method to invoke when a batch of contract // bytes codes are received from a remote peer in the healing phase. -func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { +func (s *syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { var size common.StorageSize for _, code := range bytecodes { size += common.StorageSize(len(code)) @@ -3088,7 +3088,7 @@ func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) e // or storage slot) is downloaded during the healing stage. The flat states // can be persisted blindly and can be fixed later in the generation stage. // Note it's not concurrent safe, please handle the concurrent issue outside. -func (s *Syncer) onHealState(paths [][]byte, value []byte) error { +func (s *syncer) onHealState(paths [][]byte, value []byte) error { if len(paths) == 1 { var account types.StateAccount if err := rlp.DecodeBytes(value, &account); err != nil { @@ -3115,7 +3115,7 @@ func (s *Syncer) onHealState(paths [][]byte, value []byte) error { var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil) // report calculates various status reports and provides it to the user. -func (s *Syncer) report(force bool) { +func (s *syncer) report(force bool) { if len(s.tasks) > 0 { s.reportSyncProgress(force) return @@ -3124,7 +3124,7 @@ func (s *Syncer) report(force bool) { } // reportSyncProgress calculates various status reports and provides it to the user. -func (s *Syncer) reportSyncProgress(force bool) { +func (s *syncer) reportSyncProgress(force bool) { // Don't report all the events, just occasionally if !force && time.Since(s.logTime) < 8*time.Second { return @@ -3170,7 +3170,7 @@ func (s *Syncer) reportSyncProgress(force bool) { } // reportHealProgress calculates various status reports and provides it to the user. -func (s *Syncer) reportHealProgress(force bool) { +func (s *syncer) reportHealProgress(force bool) { // Don't report all the events, just occasionally if !force && time.Since(s.logTime) < 8*time.Second { return diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index c506488e91..1afd9e1fb4 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -129,7 +129,7 @@ type ( type testPeer struct { id string test *testing.T - remote *Syncer + remote *syncer logger log.Logger accountTrie *trie.Trie accountValues []*kv @@ -623,9 +623,9 @@ func testSyncBloatedProof(t *testing.T, scheme string) { } } -func setupSyncer(scheme string, peers ...*testPeer) *Syncer { +func setupSyncer(scheme string, peers ...*testPeer) *syncer { stateDb := rawdb.NewMemoryDatabase() - syncer := NewSyncer(stateDb, scheme) + syncer := newSyncer(stateDb, scheme) for _, peer := range peers { syncer.Register(peer) peer.remote = syncer diff --git a/eth/protocols/snap/syncer.go b/eth/protocols/snap/syncer.go new file mode 100644 index 0000000000..0c3ea5caf2 --- /dev/null +++ b/eth/protocols/snap/syncer.go @@ -0,0 +1,147 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snap + +import ( + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/rlp" +) + +// Progress is the set of snap-syncer progress that eth/downloader surfaces in +// ethereum.SyncProgress. The two syncer versions report it via different types +// (syncProgress / syncProgressV2). The adapters normalize to this. +type Progress struct { + AccountSynced uint64 + AccountBytes common.StorageSize + BytecodeSynced uint64 + BytecodeBytes common.StorageSize + StorageSynced uint64 + StorageBytes common.StorageSize + + // Healing-phase status. Reported by snap/1 only. + TrienodeHealSynced uint64 + TrienodeHealBytes common.StorageSize + BytecodeHealSynced uint64 + BytecodeHealBytes common.StorageSize + HealingTrienodes uint64 + HealingBytecode uint64 +} + +// Syncer is the uniform view over the snap/1 (*syncer) and snap/2 (*syncerV2) +// state syncers, consumed by eth/downloader. Peers are passed as SyncPeerV2, +// which is a superset of SyncPeer, so a single peer value works for both +// underlying syncers. +type Syncer interface { + Sync(pivot *types.Header, cancel chan struct{}) error + Progress() Progress + Register(peer SyncPeerV2) error + Unregister(id string) error + OnAccounts(peer SyncPeerV2, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error + OnStorage(peer SyncPeerV2, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error + OnByteCodes(peer SyncPeerV2, id uint64, bytecodes [][]byte) error + OnTrieNodes(peer SyncPeerV2, id uint64, trienodes [][]byte) error + OnAccessLists(peer SyncPeerV2, id uint64, lists rlp.RawList[rlp.RawValue]) error + + // Version is the snap protocol version this syncer implements. + Version() uint +} + +// NewV1Syncer returns a Syncer backed by the snap/1 state syncer. +func NewV1Syncer(db ethdb.Database, scheme string) Syncer { + return syncerV1Adapter{newSyncer(db, scheme)} +} + +// NewV2Syncer returns a Syncer backed by the snap/2 state syncer. +func NewV2Syncer(db ethdb.Database, scheme string) Syncer { + return syncerV2Adapter{newSyncerV2(db, scheme)} +} + +// syncerV1Adapter adapts the snap/1 *syncer to Syncer. +type syncerV1Adapter struct{ *syncer } + +func (s syncerV1Adapter) Sync(pivot *types.Header, cancel chan struct{}) error { + return s.syncer.Sync(pivot.Root, cancel) +} + +func (s syncerV1Adapter) Progress() Progress { + progress, pending := s.syncer.Progress() + return Progress{ + AccountSynced: progress.AccountSynced, + AccountBytes: progress.AccountBytes, + BytecodeSynced: progress.BytecodeSynced, + BytecodeBytes: progress.BytecodeBytes, + StorageSynced: progress.StorageSynced, + StorageBytes: progress.StorageBytes, + TrienodeHealSynced: progress.TrienodeHealSynced, + TrienodeHealBytes: progress.TrienodeHealBytes, + BytecodeHealSynced: progress.BytecodeHealSynced, + BytecodeHealBytes: progress.BytecodeHealBytes, + HealingTrienodes: pending.TrienodeHeal, + HealingBytecode: pending.BytecodeHeal, + } +} + +// The snap/1 syncer's methods take SyncPeer. SyncPeerV2 is a superset, so the +// incoming peer satisfies them directly. Explicit forwarders are needed because +// the parameter types differ. +func (s syncerV1Adapter) Register(peer SyncPeerV2) error { return s.syncer.Register(peer) } +func (s syncerV1Adapter) OnAccounts(peer SyncPeerV2, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error { + return s.syncer.OnAccounts(peer, id, hashes, accounts, proof) +} +func (s syncerV1Adapter) OnStorage(peer SyncPeerV2, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error { + return s.syncer.OnStorage(peer, id, hashes, slots, proof) +} +func (s syncerV1Adapter) OnByteCodes(peer SyncPeerV2, id uint64, bytecodes [][]byte) error { + return s.syncer.OnByteCodes(peer, id, bytecodes) +} +func (s syncerV1Adapter) OnTrieNodes(peer SyncPeerV2, id uint64, trienodes [][]byte) error { + return s.syncer.OnTrieNodes(peer, id, trienodes) +} + +// OnAccessLists is a no-op for snap/1, which never requests BALs. +func (syncerV1Adapter) OnAccessLists(SyncPeerV2, uint64, rlp.RawList[rlp.RawValue]) error { + return nil +} + +// Version is SNAP1 +func (syncerV1Adapter) Version() uint { return SNAP1 } + +// syncerV2Adapter adapts the snap/2 *syncerV2 to Syncer. Its peer-facing methods +// already take SyncPeerV2 and its Sync already takes a header, so only Progress +// (different return type) and OnTrieNodes (absent) need wrapping. +type syncerV2Adapter struct{ *syncerV2 } + +func (s syncerV2Adapter) Progress() Progress { + progress := s.syncerV2.Progress() + return Progress{ + AccountSynced: progress.AccountSynced, + AccountBytes: progress.AccountBytes, + BytecodeSynced: progress.BytecodeSynced, + BytecodeBytes: progress.BytecodeBytes, + StorageSynced: progress.StorageSynced, + StorageBytes: progress.StorageBytes, + } +} + +// OnTrieNodes is a no-op for snap/2, which heals via BALs rather than trie nodes. +// Stale responses from snap/1 peers are silently ignored. +func (syncerV2Adapter) OnTrieNodes(SyncPeerV2, uint64, [][]byte) error { return nil } + +// Version is SNAP2; snap/2 needs SNAP2 peers to serve the BAL requests it issues. +func (syncerV2Adapter) Version() uint { return SNAP2 } diff --git a/eth/protocols/snap/syncv2.go b/eth/protocols/snap/syncv2.go index 0bbcd9c35f..70f78e3ec8 100644 --- a/eth/protocols/snap/syncv2.go +++ b/eth/protocols/snap/syncv2.go @@ -21,6 +21,7 @@ import ( "encoding/json" "errors" "fmt" + "maps" "math/big" "math/rand" "sort" @@ -30,6 +31,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/core/types/bal" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/event" @@ -38,8 +40,34 @@ import ( "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/triedb" ) +const ( + // maxAccessListRequestCount is the maximum number of block BALs to + // request in a single query. BALs average ~72 KiB compressed (per EIP-7928), + // and EIP-8189 recommends a 2 MiB response soft limit, so we target ~28 + // blocks per request to avoid server-side truncation. + // + // NOTE: If the gas limit is raised significantly, this number may need to be adjusted + // to avoid server-side truncation and re-requesting. It is currently based on + // the assumption that the gas limit is 60M. + maxAccessListRequestCount = 28 + + // syncProgressVersion is the version byte prepended to the JSON-encoded + // syncProgressV2 when persisted. On load, a mismatching version byte causes + // the persisted progress to be discarded and sync to start fresh. + syncProgressVersion byte = 2 +) + +// minRequestSize, maxRequestSize, maxCodeRequestCount, accountConcurrency and +// storageConcurrency are shared with the snap/1 syncer; see sync.go. + +// errAccessListPeersExhausted is returned from fetchAccessLists when every +// connected peer has been marked stateless for BAL requests and there +// are still hashes left to fetch. +var errAccessListPeersExhausted = errors.New("all peers exhausted for BAL requests") + // accountRequestV2 tracks a pending account range request to ensure responses are // to actual requests and to validate any security constraints. // @@ -154,6 +182,23 @@ type storageResponseV2 struct { cont bool // Whether the last storage range has a continuation } +type accessListRequest struct { + peer string // Peer to which this request is assigned + id uint64 // Request ID of this request + hashes []common.Hash // Block hashes corresponding to requested BALs + time time.Time // Timestamp when the request was sent + timeout *time.Timer // Timer to track the delivery timeout + deliver chan *accessListResponse // Channel to deliver successful response on + revert chan *accessListRequest // Channel to deliver request failure on + cancel chan struct{} // Channel to track sync cancellation + stale chan struct{} // Channel to signal the request was dropped +} + +type accessListResponse struct { + req *accessListRequest + accessLists []rlp.RawValue +} + // accountTaskV2 represents the sync task for a chunk of the account snapshot. type accountTaskV2 struct { // These fields get serialized to key-value store on shutdown @@ -216,11 +261,13 @@ type storageTaskV2 struct { done bool // Flag whether the task can be removed } -// SyncProgressV2 is a database entry to allow suspending and resuming a snapshot state +// syncProgressV2 is a database entry to allow suspending and resuming a snapshot state // sync. Opposed to full and fast sync, there is no way to restart a suspended // snap sync without prior knowledge of the suspension point. -type SyncProgressV2 struct { - Tasks []*accountTaskV2 // The suspended account tasks (contract tasks within) +type syncProgressV2 struct { + Pivot *types.Header // Pivot header being synced (for pivot move and reorg detection) + Tasks []*accountTaskV2 // The suspended account tasks (contract tasks within) + Complete bool // True once sync ran to completion for Pivot // Status report during syncing phase AccountSynced uint64 // Number of accounts downloaded @@ -250,15 +297,23 @@ type SyncPeerV2 interface { // RequestByteCodes fetches a batch of bytecodes by hash. RequestByteCodes(id uint64, hashes []common.Hash, bytes int) error + // RequestTrieNodes fetches a batch of account or storage trie nodes rooted in + // a specific state trie. snap/2 never issues these requests itself, but the + // method is retained so a single peer type can serve both the snap/1 and + // snap/2 syncers (e.g. via the downloader's syncer abstraction). + RequestTrieNodes(id uint64, root common.Hash, count int, paths []TrieNodePathSet, bytes int) error + + // RequestAccessLists fetches a batch of BALs by block hash. + RequestAccessLists(id uint64, hashes []common.Hash, bytes int) error + // Log retrieves the peer's own contextual logger. Log() log.Logger } -// SyncerV2 is an Ethereum account and storage state syncer based on the snap -// protocol. It's purpose is to download all the accounts and storage slots -// from remote peers, fixing the state inconsistencies between multiple sync -// targets with BALs(block level accessList) and ultimately reassemble the state -// trie (both account trie and storage tries) locally. +// syncerV2 is an Ethereum account and storage trie syncer based on the snap +// protocol. It downloads all accounts, storage slots, and bytecodes from +// remote peers as flat state, applies BAL diffs on pivot moves, +// and triggers a final trie rebuild once flat state is consistent. // // Every network request has a variety of failure events: // - The peer disconnects after task assignment, failing to send the request @@ -266,28 +321,35 @@ type SyncPeerV2 interface { // - The peer remains connected, but does not deliver a response in time // - The peer delivers a stale response after a previous timeout // - The peer delivers a refusal to serve the requested state -type SyncerV2 struct { - db ethdb.KeyValueStore // Database to store the trie nodes into (and dedup) - scheme string // Node scheme used in node database +type syncerV2 struct { + db ethdb.Database // Database to store the trie nodes into (and dedup) + scheme string // Node scheme used in node database - root common.Hash // Current state trie root being synced - tasks []*accountTaskV2 // Current account task set being synced - update chan struct{} // Notification channel for possible sync progression + pivot *types.Header // Current pivot header being synced (lock needed) + previousPivot *types.Header // Pivot from previous sync run (for pivot move detection) + complete bool // Whether the persisted progress was a completed sync + tasks []*accountTaskV2 // Current account task set being synced + update chan struct{} // Notification channel for possible sync progression peers map[string]SyncPeerV2 // Currently active peers to download from peerJoin *event.Feed // Event feed to react to peers joining peerDrop *event.Feed // Event feed to react to peers dropping rates *msgrate.Trackers // Message throughput rates for peers - // Request tracking during syncing phase - statelessPeers map[string]struct{} // Peers that failed to deliver state data - accountIdlers map[string]struct{} // Peers that aren't serving account requests - bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests - storageIdlers map[string]struct{} // Peers that aren't serving storage requests + // Request tracking during syncing phase. + // + // These fields should be protected by lock. + statelessPeers map[string]struct{} // Peers that failed to deliver state data + accountIdlers map[string]struct{} // Peers that aren't serving account requests + bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests + storageIdlers map[string]struct{} // Peers that aren't serving storage requests + accessListIdlers map[string]struct{} // Peers that aren't serving BAL requests - accountReqs map[uint64]*accountRequestV2 // Account requests currently running - bytecodeReqs map[uint64]*bytecodeRequestV2 // Bytecode requests currently running - storageReqs map[uint64]*storageRequestV2 // Storage requests currently running + // These fields should be protected by lock. + accountReqs map[uint64]*accountRequestV2 // Account requests currently running + bytecodeReqs map[uint64]*bytecodeRequestV2 // Bytecode requests currently running + storageReqs map[uint64]*storageRequestV2 // Storage requests currently running + accessListReqs map[uint64]*accessListRequest // BAL requests currently running accountSynced uint64 // Number of accounts downloaded accountBytes common.StorageSize // Number of account trie bytes persisted to disk @@ -296,19 +358,19 @@ type SyncerV2 struct { storageSynced uint64 // Number of storage slots downloaded storageBytes common.StorageSize // Number of storage trie bytes persisted to disk - extProgress *SyncProgressV2 // progress that can be exposed to external caller. + extProgress *syncProgressV2 // progress that can be exposed to external caller. startTime time.Time // Time instance when snapshot sync started logTime time.Time // Time instance when status was last reported pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown - lock sync.RWMutex // Protects fields that can change outside of sync (peers, reqs, root) + lock sync.RWMutex // Protects fields that can change outside of sync (peers, reqs, pivot) } -// NewSyncerV2 creates a new snapshot syncer to download the Ethereum state over the +// newSyncerV2 creates a new snapshot syncer to download the Ethereum state over the // snap protocol. -func NewSyncerV2(db ethdb.KeyValueStore, scheme string) *SyncerV2 { - return &SyncerV2{ +func newSyncerV2(db ethdb.Database, scheme string) *syncerV2 { + return &syncerV2{ db: db, scheme: scheme, @@ -318,20 +380,23 @@ func NewSyncerV2(db ethdb.KeyValueStore, scheme string) *SyncerV2 { rates: msgrate.NewTrackers(log.New("proto", "snap")), update: make(chan struct{}, 1), - accountIdlers: make(map[string]struct{}), - storageIdlers: make(map[string]struct{}), - bytecodeIdlers: make(map[string]struct{}), + statelessPeers: make(map[string]struct{}), + accountIdlers: make(map[string]struct{}), + storageIdlers: make(map[string]struct{}), + bytecodeIdlers: make(map[string]struct{}), + accessListIdlers: make(map[string]struct{}), - accountReqs: make(map[uint64]*accountRequestV2), - storageReqs: make(map[uint64]*storageRequestV2), - bytecodeReqs: make(map[uint64]*bytecodeRequestV2), + accountReqs: make(map[uint64]*accountRequestV2), + storageReqs: make(map[uint64]*storageRequestV2), + bytecodeReqs: make(map[uint64]*bytecodeRequestV2), + accessListReqs: make(map[uint64]*accessListRequest), - extProgress: new(SyncProgressV2), + extProgress: new(syncProgressV2), } } // Register injects a new data source into the syncer's peerset. -func (s *SyncerV2) Register(peer SyncPeerV2) error { +func (s *syncerV2) Register(peer SyncPeerV2) error { // Make sure the peer is not registered yet id := peer.ID() @@ -349,6 +414,7 @@ func (s *SyncerV2) Register(peer SyncPeerV2) error { s.accountIdlers[id] = struct{}{} s.storageIdlers[id] = struct{}{} s.bytecodeIdlers[id] = struct{}{} + s.accessListIdlers[id] = struct{}{} s.lock.Unlock() // Notify any active syncs that a new peer can be assigned data @@ -357,7 +423,7 @@ func (s *SyncerV2) Register(peer SyncPeerV2) error { } // Unregister injects a new data source into the syncer's peerset. -func (s *SyncerV2) Unregister(id string) error { +func (s *syncerV2) Unregister(id string) error { // Remove all traces of the peer from the registry s.lock.Lock() if _, ok := s.peers[id]; !ok { @@ -375,6 +441,7 @@ func (s *SyncerV2) Unregister(id string) error { delete(s.accountIdlers, id) delete(s.storageIdlers, id) delete(s.bytecodeIdlers, id) + delete(s.accessListIdlers, id) s.lock.Unlock() // Notify any active syncs that pending requests need to be reverted @@ -383,58 +450,124 @@ func (s *SyncerV2) Unregister(id string) error { } // Sync starts (or resumes a previous) sync cycle to iterate over a state trie -// with the given root and reconstruct the nodes based on the snapshot leaves. -// Previously downloaded segments will not be redownloaded of fixed, rather any -// errors will be healed after the leaves are fully accumulated. -func (s *SyncerV2) Sync(root common.Hash, cancel chan struct{}) error { - // Move the trie root from any previous value, revert stateless markers for - // any peers and initialize the syncer if it was not yet run +// with the given pivot header and reconstruct the nodes based on the snapshot +// leaves. +func (s *syncerV2) Sync(pivot *types.Header, cancel chan struct{}) error { + if pivot == nil { + return errors.New("snap sync: pivot header is nil") + } s.lock.Lock() - s.root = root + s.pivot = pivot + s.previousPivot = nil // loadSyncStatus overwrites when resuming from persisted progress s.statelessPeers = make(map[string]struct{}) s.lock.Unlock() - if s.startTime.IsZero() { s.startTime = time.Now() } - // Retrieve the previous sync status from LevelDB and abort if already synced + root := pivot.Root + + // Retrieve the previous sync status from DB. If there's no persisted + // status, sync is either fresh or already complete. s.loadSyncStatus() - if len(s.tasks) == 0 { - log.Debug("Snapshot sync already completed") + + // isPivotChanged is true when we have prior progress against a different + // pivot. That means we need to roll forward via catchUp, or wipe and + // restart if the prior pivot was reorged out. + isPivotChanged := s.previousPivot != nil && s.previousPivot.Hash() != s.pivot.Hash() + + // Skip if we've already finished syncing this pivot. + if !isPivotChanged && s.complete { + log.Info("Snap sync already complete for this pivot", "root", root) return nil } - defer func() { // Persist any progress, independent of failure - for _, task := range s.tasks { - s.forwardAccountTask(task) - } - s.cleanAccountTasks() - s.saveSyncStatus() - }() - log.Debug("Starting snapshot sync cycle", "root", root) - defer s.report(true) + // We're committing to running this sync. Clear the complete flag so a + // mid-run save (on cancel or error) doesn't persist a stale Complete=true + // status from a prior pivot. + s.lock.Lock() + s.complete = false + s.lock.Unlock() - // Whether sync completed or not, disregard any future packets defer func() { + // Whether sync completed or not, disregard any future packets log.Debug("Terminating snapshot sync cycle", "root", root) s.lock.Lock() s.accountReqs = make(map[uint64]*accountRequestV2) s.storageReqs = make(map[uint64]*storageRequestV2) s.bytecodeReqs = make(map[uint64]*bytecodeRequestV2) + s.accessListReqs = make(map[uint64]*accessListRequest) s.lock.Unlock() + + // Persist final task state. + for _, task := range s.tasks { + s.forwardAccountTask(task) + } + s.cleanAccountTasks() + s.saveSyncStatus() + + // Log final progress. + s.report(true) }() - // Keep scheduling sync tasks + + log.Debug("Starting snapshot sync cycle", "root", root) + + // If we resumed against a different pivot, decide whether the persisted + // progress is still usable. If yes, roll forward via BAL catch-up. If not, + // wipe everything and restart fresh. + if isPivotChanged { + if isPivotReorged(s.db, s.previousPivot, s.pivot) { + log.Warn("Persisted progress unusable, restarting snap sync from scratch", + "number", s.previousPivot.Number, "oldHash", s.previousPivot.Hash()) + s.resetSyncState() + } else if err := s.catchUp(cancel); err != nil { + return err + } + } + + // Pin previousPivot to the current pivot before downloadState runs. + // This is what saveSyncStatus persists. If the download is interrupted + // and the next Sync gets a different pivot, this is how isPivotReorged + // recognizes the partial flat state belongs to the old pivot. Without + // it, isPivotReorged sees nil, skips the reorg branch, and downloadState + // would resume from the persisted task markers but mix the old pivot's + // already-downloaded accounts with the new pivot's data. + s.lock.Lock() + s.previousPivot = s.pivot + s.lock.Unlock() + + log.Info("Starting state download", "root", root) + if err := s.downloadState(cancel); err != nil { + return err + } + log.Info("State download complete", "root", root) + + log.Info("Starting trie generation", "root", root) + if _, err := triedb.GenerateTrie(s.db, s.scheme, root, cancel); err != nil { + return err + } + log.Info("Trie generation complete", "root", root) + + // Mark sync complete. The deferred saveSyncStatus persists this with + // Complete=true so a follow-up Sync call for the same pivot can skip + // the work entirely. + s.lock.Lock() + s.complete = true + s.lock.Unlock() + return nil +} + +// download runs the bulk flat-state download. It fetches +// account ranges, storage slots, and bytecodes, writing flat state to disk. +func (s *syncerV2) downloadState(cancel chan struct{}) error { + // Subscribe to peer events peerJoin := make(chan string, 16) peerJoinSub := s.peerJoin.Subscribe(peerJoin) defer peerJoinSub.Unsubscribe() - peerDrop := make(chan string, 16) peerDropSub := s.peerDrop.Subscribe(peerDrop) defer peerDropSub.Unsubscribe() - // Create a set of unique channels for this sync cycle. We need these to be - // ephemeral so a data race doesn't accidentally deliver something stale on - // a persistent channel across syncs (yup, this happened) + // Create ephemeral channels for this download cycle var ( accountReqFails = make(chan *accountRequestV2) storageReqFails = make(chan *storageRequestV2) @@ -444,12 +577,13 @@ func (s *SyncerV2) Sync(root common.Hash, cancel chan struct{}) error { bytecodeResps = make(chan *bytecodeResponseV2) ) for { - // Remove all completed tasks and terminate sync if everything's done + // Remove all completed tasks and terminate if everything's done s.cleanStorageTasks() s.cleanAccountTasks() if len(s.tasks) == 0 { return nil } + // Assign all the data retrieval tasks to any free peers s.assignAccountTasks(accountResps, accountReqFails, cancel) s.assignBytecodeTasks(bytecodeResps, bytecodeReqFails, cancel) @@ -457,7 +591,7 @@ func (s *SyncerV2) Sync(root common.Hash, cancel chan struct{}) error { // Update sync progress s.lock.Lock() - s.extProgress = &SyncProgressV2{ + s.extProgress = &syncProgressV2{ AccountSynced: s.accountSynced, AccountBytes: s.accountBytes, BytecodeSynced: s.bytecodeSynced, @@ -473,7 +607,7 @@ func (s *SyncerV2) Sync(root common.Hash, cancel chan struct{}) error { case <-peerJoin: // A new peer joined, try to schedule it new tasks case id := <-peerDrop: - s.revertRequests(id) + s.revertStateRequests(id) case <-cancel: return ErrCancelled @@ -491,25 +625,325 @@ func (s *SyncerV2) Sync(root common.Hash, cancel chan struct{}) error { case res := <-storageResps: s.processStorageResponse(res) } + // Report stats if something meaningful happened s.report(false) } } -// loadSyncStatus retrieves a previously aborted sync status from the database, -// or generates a fresh one if none is available. -func (s *SyncerV2) loadSyncStatus() { - var progress SyncProgressV2 +// isPivotReorged reports whether the previous pivot is no longer usable +// as a starting point for forward catch-up. Either it was reorged out +// of the canonical chain, or the new pivot doesn't advance past it. +func isPivotReorged(db ethdb.Database, prev, curr *types.Header) bool { + // If the new pivot is at or below the old one, there's nothing for + // catchUp to roll forward. + if curr.Number.Cmp(prev.Number) <= 0 { + return true + } + // If there's no canonical hash at the old pivot's height, something + // is wrong. Headers up to the new pivot should already be indexed, + // so a missing entry at an earlier block means the chain state is + // broken. The most common cause is a chain rewind across the + // snap-synced pivot, which resets head to genesis and deletes + // canonical entries above it (see rewindPathHead in core/blockchain.go). + // Bail and let the fresh sync recover. + canonical := rawdb.ReadCanonicalHash(db, prev.Number.Uint64()) + if canonical == (common.Hash{}) { + return true + } - if status := rawdb.ReadSnapshotSyncStatus(s.db); status != nil { - if err := json.Unmarshal(status, &progress); err != nil { + // If canonical at the old pivot's height has a different hash, the + // old pivot was reorged out. + return canonical != prev.Hash() +} + +// catchUp runs the BAL catch-up. When the pivot has moved, it fetches BALs +// for the gap blocks, verifies them against block headers, and applies the +// diffs to roll flat state forward. +func (s *syncerV2) catchUp(cancel chan struct{}) error { + s.lock.RLock() + from := s.previousPivot.Number.Uint64() + 1 + to := s.pivot.Number.Uint64() + s.lock.RUnlock() + log.Info("Starting BAL catch-up", "from", from, "to", to, "blocks", to-from+1) + + // Collect block hashes and headers for the gap range. + var ( + hashes = make([]common.Hash, 0, to-from+1) + headers = make(map[common.Hash]*types.Header, to-from+1) + ) + for num := from; num <= to; num++ { + hash := rawdb.ReadCanonicalHash(s.db, num) + if hash == (common.Hash{}) { + return fmt.Errorf("missing canonical hash for block %d during catch-up", num) + } + header := rawdb.ReadHeader(s.db, hash, num) + if header == nil { + return fmt.Errorf("missing header for block %d (hash %v) during catch-up", num, hash) + } + hashes = append(hashes, hash) + headers[hash] = header + } + + // Fetch BALs from peers + rawBALs, err := s.fetchAccessLists(hashes, headers, cancel) + if err != nil { + return err + } + + // Apply each BAL in block order. BALs are already verified by fetchAccessLists. + for i, raw := range rawBALs { + select { + case <-cancel: + return ErrCancelled + default: + } + num := from + uint64(i) + hash := hashes[i] + + // Decode the raw RLP into a BAL. + var ( + b bal.BlockAccessList + batch = s.db.NewBatch() + ) + if err := rlp.DecodeBytes(raw, &b); err != nil { + return fmt.Errorf("failed to decode BAL for block %d: %v", num, err) + } + + // applyAccessList failures are persistent. If a block's apply fails + // here, the next Sync will resume from this block and hit the same + // failure. Auto-recovery isn't implemented yet. + if err := s.applyAccessList(&b, batch); err != nil { + return fmt.Errorf("BAL application failed for block %d: %v", num, err) + } + + // Persist incremental progress so a crash mid-catchUp can resume + // from the next unapplied block. + s.lock.Lock() + s.previousPivot = headers[hash] + s.lock.Unlock() + s.saveSyncStatusWithDB(batch) + + // Commit the state transition alongside the sync progress atomically. + if err := batch.Write(); err != nil { + return err + } + } + log.Info("BAL catch-up complete", "blocks", len(rawBALs)) + return nil +} + +// fetchAccessLists fetches BALs for the given block hashes from +// remote peers. It runs its own event loop to assign requests +// to idle peers and process responses asynchronously. Each BAL is verified +// against its header before being accepted. Results are returned in the +// same order as the input hashes. +func (s *syncerV2) fetchAccessLists(hashes []common.Hash, headers map[common.Hash]*types.Header, cancel chan struct{}) ([]rlp.RawValue, error) { + log.Debug("Fetching BALs for catch-up", "blocks", len(hashes)) + + // Subscribe to peer events + peerJoin := make(chan string, 16) + peerJoinSub := s.peerJoin.Subscribe(peerJoin) + defer peerJoinSub.Unsubscribe() + peerDrop := make(chan string, 16) + peerDropSub := s.peerDrop.Subscribe(peerDrop) + defer peerDropSub.Unsubscribe() + + // pending = hashes not yet assigned to a peer, fetched = collected results. + pending := make(map[common.Hash]struct{}, len(hashes)) + for _, h := range hashes { + pending[h] = struct{}{} + } + fetched := make(map[common.Hash]rlp.RawValue, len(hashes)) + + var ( + accessListReqFails = make(chan *accessListRequest) + accessListResps = make(chan *accessListResponse) + lastStallLog = time.Now() + ) + for len(fetched) < len(hashes) { + // Assign BAL retrieval tasks to idle peers + s.assignAccessListTasks(pending, accessListResps, accessListReqFails, cancel) + + // If every peer is now stateless and nothing is in flight, no event + // short of cancel or a new peer joining can move us forward. Surface + // this so the caller can return and let a higher-level retry happen + // against a fresh peer set. + // + // TODO(rjl, jonny) add a time allowance before returning the error. + if s.accessListPeersExhausted() { + log.Warn("BAL peers exhausted, stopping catch-up early", "fetched", len(fetched), "remaining", len(pending)) + return nil, errAccessListPeersExhausted + } + + // Periodic visibility while stalled with peers connected but idle. + if len(pending) > 0 && time.Since(lastStallLog) > 30*time.Second { + lastStallLog = time.Now() + log.Warn("BAL catch-up stalled, awaiting peers", "fetched", len(fetched), "remaining", len(pending)) + } + + // Wait for something to happen + select { + case <-s.update: + // Something happened (new peer, delivery, timeout), recheck + case <-peerJoin: + // A new peer joined, try to assign it work + case id := <-peerDrop: + s.revertBALRequests(id, pending) + case <-cancel: + return nil, ErrCancelled + case req := <-accessListReqFails: + s.revertAccessListRequest(req, pending) + case res := <-accessListResps: + s.processAccessListResponse(res, headers, pending, fetched) + } + } + // Assemble results in input order + results := make([]rlp.RawValue, len(hashes)) + for i, h := range hashes { + results[i] = fetched[h] + } + return results, nil +} + +// assignAccessListTasks attempts to assign BAL fetch requests to idle +// peers for any hashes still in pending. +func (s *syncerV2) assignAccessListTasks(pending map[common.Hash]struct{}, success chan *accessListResponse, fail chan *accessListRequest, cancel chan struct{}) { + s.lock.Lock() + defer s.lock.Unlock() + + // Iterate over pending hashes and assign to idle peers + idlers := s.sortIdlePeers(s.accessListIdlers, AccessListsMsg) + for len(idlers.ids) > 0 && len(pending) > 0 { + var ( + idle = idlers.ids[0] + peer = s.peers[idle] + cap = idlers.caps[0] + ) + idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] + + // Generate a unique request ID + var reqid uint64 + for { + reqid = uint64(rand.Int63()) + if reqid == 0 { + continue + } + if _, ok := s.accessListReqs[reqid]; ok { + continue + } + break + } + + // Collect hashes to fetch, capped by peer capacity and the + // EIP-8189 2 MiB response soft limit (~72 KiB/BAL -> 28 blocks). + if cap > maxAccessListRequestCount { + cap = maxAccessListRequestCount + } + batch := make([]common.Hash, 0, cap) + for h := range pending { + delete(pending, h) + + batch = append(batch, h) + if len(batch) >= cap { + break + } + } + req := &accessListRequest{ + peer: idle, + id: reqid, + hashes: batch, + time: time.Now(), + deliver: success, + revert: fail, + cancel: cancel, + stale: make(chan struct{}), + } + req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { + peer.Log().Debug("BAL request timed out", "reqid", reqid) + s.rates.Update(idle, AccessListsMsg, 0, 0) + s.scheduleRevertAccessListRequest(req) + }) + s.accessListReqs[reqid] = req + delete(s.accessListIdlers, idle) + + s.pend.Add(1) + go func() { + defer s.pend.Done() + + // Attempt to send the remote request and revert if it fails + if err := peer.RequestAccessLists(reqid, batch, softResponseLimit); err != nil { + log.Debug("Failed to request BALs", "err", err) + s.scheduleRevertAccessListRequest(req) + } + }() + } +} + +// processAccessListResponse handles a successful BAL response. It +// verifies each non-empty BAL against the corresponding block header and +// stores the verified ones in fetched. +func (s *syncerV2) processAccessListResponse(res *accessListResponse, headers map[common.Hash]*types.Header, pending map[common.Hash]struct{}, fetched map[common.Hash]rlp.RawValue) { + var ( + stateless bool + valid = make(map[common.Hash]rlp.RawValue) + ) + // Each response entry corresponds to the requested hash at the same index. + for i, raw := range res.accessLists { + h := res.req.hashes[i] + + // Peer doesn't have this BAL. Add it back to pending for retry. + if bytes.Equal(raw, rlp.EmptyString) { + continue + } + var b bal.BlockAccessList + if err := rlp.DecodeBytes(raw, &b); err != nil { + log.Warn("Peer sent unparseable BAL", "peer", res.req.peer, "block", h, "err", err) + stateless = true + continue + } + if err := verifyAccessList(&b, headers[h]); err != nil { + log.Warn("Peer sent invalid BAL", "peer", res.req.peer, "block", h, "err", err) + stateless = true + continue + } + valid[h] = raw + } + if stateless { + s.lock.Lock() + s.statelessPeers[res.req.peer] = struct{}{} + s.lock.Unlock() + } + // Re-add hashes that were not served back or invalid to pending + for i := 0; i < len(res.req.hashes); i++ { + if _, ok := valid[res.req.hashes[i]]; ok { + continue + } + pending[res.req.hashes[i]] = struct{}{} + } + maps.Copy(fetched, valid) +} + +// loadSyncStatus retrieves a previously aborted sync status from the database, +// or generates a fresh one if none is available. The persisted blob is framed +// as `[version byte | JSON payload]`; a missing or mismatching version byte +// causes the progress to be discarded and sync to start fresh. +func (s *syncerV2) loadSyncStatus() { + var progress syncProgressV2 + + if raw := rawdb.ReadSnapshotSyncStatus(s.db); len(raw) > 0 { + if raw[0] != syncProgressVersion { + log.Info("Discarding old-format sync progress", "version", raw[0], "expected", syncProgressVersion) + } else if err := json.Unmarshal(raw[1:], &progress); err != nil { log.Error("Failed to decode snap sync status", "err", err) } else { + s.lock.Lock() + defer s.lock.Unlock() + for _, task := range progress.Tasks { log.Debug("Scheduled account sync task", "from", task.Next, "last", task.Last) } s.tasks = progress.Tasks - for _, task := range s.tasks { // Restore the completed storages task.stateCompleted = make(map[common.Hash]struct{}) @@ -518,9 +952,8 @@ func (s *SyncerV2) loadSyncStatus() { } task.StorageCompleted = nil } - s.lock.Lock() - defer s.lock.Unlock() - + s.previousPivot = progress.Pivot + s.complete = progress.Complete s.accountSynced = progress.AccountSynced s.accountBytes = progress.AccountBytes s.bytecodeSynced = progress.BytecodeSynced @@ -531,9 +964,63 @@ func (s *SyncerV2) loadSyncStatus() { } } // Either we've failed to decode the previous state, or there was none. - // Start a fresh sync by chunking up the account range and scheduling - // them for retrieval. + s.resetSyncState() +} + +// increaseKey increase the input key by one bit. Return nil if the entire +// addition operation overflows. +func increaseKey(key []byte) []byte { + for i := len(key) - 1; i >= 0; i-- { + key[i]++ + if key[i] != 0x0 { + return key + } + } + return nil +} + +// DeleteHistoryByRange completely removes all database entries with the specific +// prefix. Note, this method assumes the space with the given prefix is exclusively +// occupied! +func deleteRange(batch ethdb.Batch, prefix []byte) { + start := prefix + limit := increaseKey(bytes.Clone(prefix)) + + // Try to remove the data in the range by a loop, as the leveldb + // doesn't support the native range deletion. + for { + err := batch.DeleteRange(start, limit) + if err == nil { + return + } + // An unclean shutdown may leave the on-disk state partially wiped and + // therefore inconsistent. This is a tradeoff of the current LevelDB-based + // approach. + if errors.Is(err, ethdb.ErrTooManyKeys) { + batch.Write() + batch.Reset() + continue + } + log.Crit("Failed to delete state entries", "err", err) + } +} + +// resetSyncState wipes all persisted snap-sync data (sync status, account +// and storage snapshots) and re-initializes in-memory state with a fresh +// chunking of the account hash range. +func (s *syncerV2) resetSyncState() { + batch := s.db.NewBatch() + rawdb.DeleteSnapshotSyncStatus(batch) + deleteRange(batch, rawdb.SnapshotAccountPrefix) + deleteRange(batch, rawdb.SnapshotStoragePrefix) + batch.Write() + + s.lock.Lock() + defer s.lock.Unlock() + s.tasks = nil + s.previousPivot = nil + s.complete = false s.accountSynced, s.accountBytes = 0, 0 s.bytecodeSynced, s.bytecodeBytes = 0, 0 s.storageSynced, s.storageBytes = 0, 0 @@ -562,8 +1049,13 @@ func (s *SyncerV2) loadSyncStatus() { } } -// saveSyncStatus marshals the remaining sync tasks into leveldb. -func (s *SyncerV2) saveSyncStatus() { +// saveSyncStatus marshals the remaining sync tasks into db. +func (s *syncerV2) saveSyncStatus() { + s.saveSyncStatusWithDB(s.db) +} + +// saveSyncStatusWithDB marshals the remaining sync tasks into the given database. +func (s *syncerV2) saveSyncStatusWithDB(db ethdb.KeyValueWriter) { // Serialize any partial progress to disk before spinning down for _, task := range s.tasks { // Save the account hashes of completed storage. @@ -575,9 +1067,11 @@ func (s *SyncerV2) saveSyncStatus() { log.Debug("Leftover completed storages", "number", len(task.StorageCompleted), "next", task.Next, "last", task.Last) } } - // Store the actual progress markers - progress := &SyncProgressV2{ + // Store the actual progress markers. + progress := &syncProgressV2{ + Pivot: s.previousPivot, Tasks: s.tasks, + Complete: s.complete, AccountSynced: s.accountSynced, AccountBytes: s.accountBytes, BytecodeSynced: s.bytecodeSynced, @@ -585,24 +1079,25 @@ func (s *SyncerV2) saveSyncStatus() { StorageSynced: s.storageSynced, StorageBytes: s.storageBytes, } - status, err := json.Marshal(progress) + blob, err := json.Marshal(progress) if err != nil { panic(err) // This can only fail during implementation } - rawdb.WriteSnapshotSyncStatus(s.db, status) + // Prepend the version byte so future format changes can be detected on load. + status := append([]byte{syncProgressVersion}, blob...) + rawdb.WriteSnapshotSyncStatus(db, status) } // Progress returns the snap sync status statistics. -func (s *SyncerV2) Progress() *SyncProgressV2 { +func (s *syncerV2) Progress() *syncProgressV2 { s.lock.Lock() defer s.lock.Unlock() - return s.extProgress } // cleanAccountTasks removes account range retrieval tasks that have already been // completed. -func (s *SyncerV2) cleanAccountTasks() { +func (s *syncerV2) cleanAccountTasks() { // If the sync was already done before, don't even bother if len(s.tasks) == 0 { return @@ -614,16 +1109,15 @@ func (s *SyncerV2) cleanAccountTasks() { i-- } } - // If everything was just finalized just, generate the account trie and start heal + // If everything was just finalized, push the final sync report if len(s.tasks) == 0 { - // Push the final sync report s.reportSyncProgressV2(true) } } // cleanStorageTasks iterates over all the account tasks and storage sub-tasks // within, cleaning any that have been completed. -func (s *SyncerV2) cleanStorageTasks() { +func (s *syncerV2) cleanStorageTasks() { for _, task := range s.tasks { for account, subtasks := range task.SubTasks { // Remove storage range retrieval tasks that completed @@ -646,7 +1140,8 @@ func (s *SyncerV2) cleanStorageTasks() { delete(task.SubTasks, account) task.pend-- - // Mark the state as complete to prevent resyncing + // Mark the state as complete to prevent resyncing, regardless + // if state healing is necessary. task.stateCompleted[account] = struct{}{} // If this was the last pending task, forward the account task @@ -659,28 +1154,15 @@ func (s *SyncerV2) cleanStorageTasks() { // assignAccountTasks attempts to match idle peers to pending account range // retrievals. -func (s *SyncerV2) assignAccountTasks(success chan *accountResponseV2, fail chan *accountRequestV2, cancel chan struct{}) { +func (s *syncerV2) assignAccountTasks(success chan *accountResponseV2, fail chan *accountRequestV2, cancel chan struct{}) { s.lock.Lock() defer s.lock.Unlock() // Sort the peers by download capacity to use faster ones if many available - idlers := &capacitySort{ - ids: make([]string, 0, len(s.accountIdlers)), - caps: make([]int, 0, len(s.accountIdlers)), - } - targetTTL := s.rates.TargetTimeout() - for id := range s.accountIdlers { - if _, ok := s.statelessPeers[id]; ok { - continue - } - idlers.ids = append(idlers.ids, id) - idlers.caps = append(idlers.caps, s.rates.Capacity(id, AccountRangeMsg, targetTTL)) - } + idlers := s.sortIdlePeers(s.accountIdlers, AccountRangeMsg) if len(idlers.ids) == 0 { return } - sort.Sort(sort.Reverse(idlers)) - // Iterate over all the tasks and try to find a pending one for _, task := range s.tasks { // Skip any tasks already filling @@ -748,7 +1230,7 @@ func (s *SyncerV2) assignAccountTasks(success chan *accountResponseV2, fail chan peer.Log().Debug("Failed to request account range", "err", err) s.scheduleRevertAccountRequest(req) } - }(s.root) + }(s.pivot.Root) // Inject the request into the task to block further assignments task.req = req @@ -756,28 +1238,14 @@ func (s *SyncerV2) assignAccountTasks(success chan *accountResponseV2, fail chan } // assignBytecodeTasks attempts to match idle peers to pending code retrievals. -func (s *SyncerV2) assignBytecodeTasks(success chan *bytecodeResponseV2, fail chan *bytecodeRequestV2, cancel chan struct{}) { +func (s *syncerV2) assignBytecodeTasks(success chan *bytecodeResponseV2, fail chan *bytecodeRequestV2, cancel chan struct{}) { s.lock.Lock() defer s.lock.Unlock() - // Sort the peers by download capacity to use faster ones if many available - idlers := &capacitySort{ - ids: make([]string, 0, len(s.bytecodeIdlers)), - caps: make([]int, 0, len(s.bytecodeIdlers)), - } - targetTTL := s.rates.TargetTimeout() - for id := range s.bytecodeIdlers { - if _, ok := s.statelessPeers[id]; ok { - continue - } - idlers.ids = append(idlers.ids, id) - idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL)) - } + idlers := s.sortIdlePeers(s.bytecodeIdlers, ByteCodesMsg) if len(idlers.ids) == 0 { return } - sort.Sort(sort.Reverse(idlers)) - // Iterate over all the tasks and try to find a pending one for _, task := range s.tasks { // Skip any tasks not in the bytecode retrieval phase @@ -859,28 +1327,14 @@ func (s *SyncerV2) assignBytecodeTasks(success chan *bytecodeResponseV2, fail ch // assignStorageTasks attempts to match idle peers to pending storage range // retrievals. -func (s *SyncerV2) assignStorageTasks(success chan *storageResponseV2, fail chan *storageRequestV2, cancel chan struct{}) { +func (s *syncerV2) assignStorageTasks(success chan *storageResponseV2, fail chan *storageRequestV2, cancel chan struct{}) { s.lock.Lock() defer s.lock.Unlock() - // Sort the peers by download capacity to use faster ones if many available - idlers := &capacitySort{ - ids: make([]string, 0, len(s.storageIdlers)), - caps: make([]int, 0, len(s.storageIdlers)), - } - targetTTL := s.rates.TargetTimeout() - for id := range s.storageIdlers { - if _, ok := s.statelessPeers[id]; ok { - continue - } - idlers.ids = append(idlers.ids, id) - idlers.caps = append(idlers.caps, s.rates.Capacity(id, StorageRangesMsg, targetTTL)) - } + idlers := s.sortIdlePeers(s.storageIdlers, StorageRangesMsg) if len(idlers.ids) == 0 { return } - sort.Sort(sort.Reverse(idlers)) - // Iterate over all the tasks and try to find a pending one for _, task := range s.tasks { // Skip any tasks not in the storage retrieval phase @@ -1005,7 +1459,7 @@ func (s *SyncerV2) assignStorageTasks(success chan *storageResponseV2, fail chan log.Debug("Failed to request storage", "err", err) s.scheduleRevertStorageRequest(req) } - }(s.root) + }(s.pivot.Root) // Inject the request into the subtask to block further assignments if subtask != nil { @@ -1014,9 +1468,9 @@ func (s *SyncerV2) assignStorageTasks(success chan *storageResponseV2, fail chan } } -// revertRequests locates all the currently pending requests from a particular -// peer and reverts them, rescheduling for others to fulfill. -func (s *SyncerV2) revertRequests(peer string) { +// revertStateRequests locates all the currently pending state requests from a +// particular peer and reverts them, rescheduling for others to fulfill. +func (s *syncerV2) revertStateRequests(peer string) { // Gather the requests first, revertals need the lock too s.lock.Lock() var accountReqs []*accountRequestV2 @@ -1051,9 +1505,28 @@ func (s *SyncerV2) revertRequests(peer string) { } } +// revertBALRequests locates all the currently pending bal requests from a +// particular peer and reverts them, rescheduling for others to fulfill. +func (s *syncerV2) revertBALRequests(peer string, pending map[common.Hash]struct{}) { + // Gather the requests first, revertals need the lock too + s.lock.Lock() + var accessListReqs []*accessListRequest + for _, req := range s.accessListReqs { + if req.peer == peer { + accessListReqs = append(accessListReqs, req) + } + } + s.lock.Unlock() + + // Revert all the requests matching the peer + for _, req := range accessListReqs { + s.revertAccessListRequest(req, pending) + } +} + // scheduleRevertAccountRequest asks the event loop to clean up an account range // request and return all failed retrieval tasks to the scheduler for reassignment. -func (s *SyncerV2) scheduleRevertAccountRequest(req *accountRequestV2) { +func (s *syncerV2) scheduleRevertAccountRequest(req *accountRequestV2) { select { case req.revert <- req: // Sync event loop notified @@ -1069,7 +1542,7 @@ func (s *SyncerV2) scheduleRevertAccountRequest(req *accountRequestV2) { // // Note, this needs to run on the event runloop thread to reschedule to idle peers. // On peer threads, use scheduleRevertAccountRequest. -func (s *SyncerV2) revertAccountRequest(req *accountRequestV2) { +func (s *syncerV2) revertAccountRequest(req *accountRequestV2) { log.Debug("Reverting account request", "peer", req.peer, "reqid", req.id) select { case <-req.stale: @@ -1098,7 +1571,7 @@ func (s *SyncerV2) revertAccountRequest(req *accountRequestV2) { // scheduleRevertBytecodeRequest asks the event loop to clean up a bytecode request // and return all failed retrieval tasks to the scheduler for reassignment. -func (s *SyncerV2) scheduleRevertBytecodeRequest(req *bytecodeRequestV2) { +func (s *syncerV2) scheduleRevertBytecodeRequest(req *bytecodeRequestV2) { select { case req.revert <- req: // Sync event loop notified @@ -1114,7 +1587,7 @@ func (s *SyncerV2) scheduleRevertBytecodeRequest(req *bytecodeRequestV2) { // // Note, this needs to run on the event runloop thread to reschedule to idle peers. // On peer threads, use scheduleRevertBytecodeRequest. -func (s *SyncerV2) revertBytecodeRequest(req *bytecodeRequestV2) { +func (s *syncerV2) revertBytecodeRequest(req *bytecodeRequestV2) { log.Debug("Reverting bytecode request", "peer", req.peer) select { case <-req.stale: @@ -1143,7 +1616,7 @@ func (s *SyncerV2) revertBytecodeRequest(req *bytecodeRequestV2) { // scheduleRevertStorageRequest asks the event loop to clean up a storage range // request and return all failed retrieval tasks to the scheduler for reassignment. -func (s *SyncerV2) scheduleRevertStorageRequest(req *storageRequestV2) { +func (s *syncerV2) scheduleRevertStorageRequest(req *storageRequestV2) { select { case req.revert <- req: // Sync event loop notified @@ -1159,7 +1632,7 @@ func (s *SyncerV2) scheduleRevertStorageRequest(req *storageRequestV2) { // // Note, this needs to run on the event runloop thread to reschedule to idle peers. // On peer threads, use scheduleRevertStorageRequest. -func (s *SyncerV2) revertStorageRequest(req *storageRequestV2) { +func (s *syncerV2) revertStorageRequest(req *storageRequestV2) { log.Debug("Reverting storage request", "peer", req.peer) select { case <-req.stale: @@ -1190,9 +1663,52 @@ func (s *SyncerV2) revertStorageRequest(req *storageRequestV2) { } } +// scheduleRevertAccessListRequest asks the event loop to clean up an access +// list request and return all failed retrieval tasks for reassignment. +// +// Note, this needs to run on the event runloop thread to reschedule to idle +// peers. On peer threads, use scheduleRevertAccessListRequest. +func (s *syncerV2) scheduleRevertAccessListRequest(req *accessListRequest) { + select { + case req.revert <- req: + // Sync event loop notified + case <-req.cancel: + // Sync cycle got cancelled + case <-req.stale: + // Request already reverted + } +} + +// revertAccessListRequest cleans up an BAL request and returns all +// failed retrieval tasks to the scheduler for reassignment. +func (s *syncerV2) revertAccessListRequest(req *accessListRequest, pending map[common.Hash]struct{}) { + log.Debug("Reverting BAL request", "peer", req.peer) + select { + case <-req.stale: + log.Trace("BAL request already reverted", "peer", req.peer, "reqid", req.id) + return + default: + } + close(req.stale) + + // Remove the request from the tracked set and restore the peer to the + // idle pool so it can be reassigned work (skip if peer already left). + s.lock.Lock() + delete(s.accessListReqs, req.id) + if _, ok := s.peers[req.peer]; ok { + s.accessListIdlers[req.peer] = struct{}{} + } + s.lock.Unlock() + + req.timeout.Stop() + for _, h := range req.hashes { + pending[h] = struct{}{} + } +} + // processAccountResponse integrates an already validated account range response // into the account tasks. -func (s *SyncerV2) processAccountResponse(res *accountResponseV2) { +func (s *syncerV2) processAccountResponse(res *accountResponseV2) { // Switch the task from pending to filling res.task.req = nil res.task.res = res @@ -1303,7 +1819,7 @@ func (s *SyncerV2) processAccountResponse(res *accountResponseV2) { // processBytecodeResponse integrates an already validated bytecode response // into the account tasks. -func (s *SyncerV2) processBytecodeResponse(res *bytecodeResponseV2) { +func (s *syncerV2) processBytecodeResponse(res *bytecodeResponseV2) { batch := s.db.NewBatch() var codes uint64 @@ -1347,7 +1863,7 @@ func (s *SyncerV2) processBytecodeResponse(res *bytecodeResponseV2) { // processStorageResponse integrates an already validated storage response // into the account tasks. -func (s *SyncerV2) processStorageResponse(res *storageResponseV2) { +func (s *syncerV2) processStorageResponse(res *storageResponseV2) { // Switch the subtask from pending to idle if res.subTask != nil { res.subTask.req = nil @@ -1421,6 +1937,7 @@ func (s *SyncerV2) processStorageResponse(res *storageResponseV2) { } else { largeStorageGauge.Inc(1) } + // Our first task is the one that was just filled by this response. tasks = append(tasks, &storageTaskV2{ Next: common.Hash{}, Last: r.End(), @@ -1501,7 +2018,7 @@ func (s *SyncerV2) processStorageResponse(res *storageResponseV2) { // forwardAccountTask takes a filled account task and persists anything available // into the database, after which it forwards the next account marker so that the // task's next chunk may be filled. -func (s *SyncerV2) forwardAccountTask(task *accountTaskV2) { +func (s *syncerV2) forwardAccountTask(task *accountTaskV2) { // Remove any pending delivery res := task.res if res == nil { @@ -1511,7 +2028,7 @@ func (s *SyncerV2) forwardAccountTask(task *accountTaskV2) { // Persist the received account segments. These flat state maybe // outdated during the sync, but it can be fixed later during the - // snapshot generation. + // trie rebuild. oldAccountBytes := s.accountBytes batch := ethdb.HookedBatch{ @@ -1533,7 +2050,7 @@ func (s *SyncerV2) forwardAccountTask(task *accountTaskV2) { } s.accountSynced += uint64(len(res.accounts)) - // Task filling persisted, push it the chunk marker forward to the first + // Task filling persisted, push the chunk marker forward to the first // account still missing data. for i, hash := range res.hashes { if task.needCode[i] || task.needState[i] { @@ -1558,7 +2075,7 @@ func (s *SyncerV2) forwardAccountTask(task *accountTaskV2) { // OnAccounts is a callback method to invoke when a range of accounts are // received from a remote peer. -func (s *SyncerV2) OnAccounts(peer SyncPeerV2, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error { +func (s *syncerV2) OnAccounts(peer SyncPeerV2, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error { size := common.StorageSize(len(hashes) * common.HashLength) for _, account := range accounts { size += common.StorageSize(len(account)) @@ -1607,7 +2124,7 @@ func (s *SyncerV2) OnAccounts(peer SyncPeerV2, id uint64, hashes []common.Hash, // retrieved was either already pruned remotely, or the peer is not yet // synced to our head. if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 { - logger.Debug("Peer rejected account range request", "root", s.root) + logger.Debug("Peer rejected account range request", "root", s.pivot.Root) s.statelessPeers[peer.ID()] = struct{}{} s.lock.Unlock() @@ -1615,7 +2132,7 @@ func (s *SyncerV2) OnAccounts(peer SyncPeerV2, id uint64, hashes []common.Hash, s.scheduleRevertAccountRequest(req) return nil } - root := s.root + root := s.pivot.Root s.lock.Unlock() // Reconstruct a partial trie from the response and verify it @@ -1658,7 +2175,7 @@ func (s *SyncerV2) OnAccounts(peer SyncPeerV2, id uint64, hashes []common.Hash, // OnByteCodes is a callback method to invoke when a batch of contract // bytes codes are received from a remote peer in the syncing phase. -func (s *SyncerV2) OnByteCodes(peer SyncPeerV2, id uint64, bytecodes [][]byte) error { +func (s *syncerV2) OnByteCodes(peer SyncPeerV2, id uint64, bytecodes [][]byte) error { var size common.StorageSize for _, code := range bytecodes { size += common.StorageSize(len(code)) @@ -1756,7 +2273,7 @@ func (s *SyncerV2) OnByteCodes(peer SyncPeerV2, id uint64, bytecodes [][]byte) e // OnStorage is a callback method to invoke when ranges of storage slots // are received from a remote peer. -func (s *SyncerV2) OnStorage(peer SyncPeerV2, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error { +func (s *syncerV2) OnStorage(peer SyncPeerV2, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error { // Gather some trace stats to aid in debugging issues var ( hashCount int @@ -1903,16 +2420,96 @@ func (s *SyncerV2) OnStorage(peer SyncPeerV2, id uint64, hashes [][]common.Hash, return nil } -// report calculates various status reports and provides it to the user. -func (s *SyncerV2) report(force bool) { - if len(s.tasks) > 0 { - s.reportSyncProgressV2(force) - return +// OnAccessLists is a callback method to invoke when a batch of BALs +// are received from a remote peer. +func (s *syncerV2) OnAccessLists(peer SyncPeerV2, id uint64, accessLists rlp.RawList[rlp.RawValue]) error { + // Convert RawList to slice of raw values + bals, err := accessLists.Items() + if err != nil { + return err } + + // Calculate total size of returned data + var size common.StorageSize + for _, bal := range bals { + size += common.StorageSize(len(bal)) + } + logger := peer.Log().New("reqid", id) + logger.Trace("Delivering set of BALs", "count", len(bals), "bytes", size) + + // Whether or not the response is valid, we can mark the peer as idle and + // notify the scheduler to assign a new task. If the response is invalid, + // we'll drop the peer in a bit. + defer func() { + s.lock.Lock() + defer s.lock.Unlock() + if _, ok := s.peers[peer.ID()]; ok { + s.accessListIdlers[peer.ID()] = struct{}{} + } + select { + case s.update <- struct{}{}: + default: + } + }() + s.lock.Lock() + + // Ensure the response is for a valid request + req, ok := s.accessListReqs[id] + if !ok { + // Request stale, perhaps the peer timed out but came through in the end + logger.Warn("Unexpected BAL packet") + s.lock.Unlock() + return nil + } + delete(s.accessListReqs, id) + s.rates.Update(peer.ID(), AccessListsMsg, time.Since(req.time), len(bals)) + + // Clean up the request timeout timer + if !req.timeout.Stop() { + // The timeout is already triggered, and this request will be reverted+rescheduled + s.lock.Unlock() + return nil + } + + // Response is valid, but check if peer is signalling that it does not have + // the requested data. + if len(bals) == 0 { + logger.Debug("Peer rejected BAL request") + s.statelessPeers[peer.ID()] = struct{}{} + s.lock.Unlock() + + // Signal this request as failed, and ready for rescheduling + s.scheduleRevertAccessListRequest(req) + return nil + } + if len(bals) > len(req.hashes) { + s.lock.Unlock() + s.scheduleRevertAccessListRequest(req) + logger.Warn("Peer sent more BALs than requested", "count", len(bals), "requested", len(req.hashes)) + return errors.New("more BALs than requested") + } + s.lock.Unlock() + + // Response validated, send it to the scheduler for filling. + response := &accessListResponse{ + req: req, + accessLists: bals, + } + select { + case req.deliver <- response: + case <-req.cancel: + case <-req.stale: + } + return nil +} + +// report calculates various status reports and provides it to the user. +func (s *syncerV2) report(force bool) { + s.reportSyncProgressV2(force) } // reportSyncProgressV2 calculates various status reports and provides it to the user. -func (s *SyncerV2) reportSyncProgressV2(force bool) { +func (s *syncerV2) reportSyncProgressV2(force bool) { // Don't report all the events, just occasionally if !force && time.Since(s.logTime) < 8*time.Second { return @@ -1935,7 +2532,6 @@ func (s *SyncerV2) reportSyncProgressV2(force bool) { new(big.Int).Mul(new(big.Int).SetUint64(uint64(synced)), hashSpace), accountFills, ).Uint64()) - // Don't report anything until we have a meaningful progress if estBytes < 1.0 { return @@ -1957,3 +2553,46 @@ func (s *SyncerV2) reportSyncProgressV2(force bool) { log.Info("Syncing: state download in progress", "synced", progress, "state", synced, "accounts", accounts, "slots", storage, "codes", bytecode, "eta", common.PrettyDuration(estTime-elapsed)) } + +// accessListPeersExhausted reports whether forward progress on BAL fetches is +// impossible: at least one peer is connected, every connected peer is marked +// stateless, and no BAL requests are in flight. +func (s *syncerV2) accessListPeersExhausted() bool { + s.lock.RLock() + defer s.lock.RUnlock() + + if len(s.peers) == 0 { + return false + } + if len(s.accessListReqs) > 0 { + return false + } + for id := range s.peers { + if _, ok := s.statelessPeers[id]; !ok { + return false + } + } + return true +} + +// sortIdlePeers builds a list of idle peers sorted by download capacity +// (highest first), filtering out stateless peers. Must be called with s.lock held. +func (s *syncerV2) sortIdlePeers(idlerSet map[string]struct{}, msgCode uint64) *capacitySort { + idlers := &capacitySort{ + ids: make([]string, 0, len(idlerSet)), + caps: make([]int, 0, len(idlerSet)), + } + targetTTL := s.rates.TargetTimeout() + for id := range idlerSet { + if _, ok := s.statelessPeers[id]; ok { + continue + } + idlers.ids = append(idlers.ids, id) + idlers.caps = append(idlers.caps, s.rates.Capacity(id, msgCode, targetTTL)) + } + if len(idlers.ids) == 0 { + return idlers + } + sort.Sort(sort.Reverse(idlers)) + return idlers +} diff --git a/eth/protocols/snap/syncv2_test.go b/eth/protocols/snap/syncv2_test.go index d303d84c09..d3c1b61800 100644 --- a/eth/protocols/snap/syncv2_test.go +++ b/eth/protocols/snap/syncv2_test.go @@ -18,7 +18,10 @@ package snap import ( "bytes" + "errors" "fmt" + "math/big" + "slices" "sync" "sync/atomic" "testing" @@ -27,55 +30,60 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/core/types/bal" + "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/triedb" + "github.com/holiman/uint256" ) -// SyncerV2 (skeleton) only downloads the flat state (accounts, storage slots, -// bytecodes) and does not perform trie generation or state healing. These tests -// verify that, in a single uninterrupted sync cycle, the syncer fully downloads -// all the expected flat state from the source peer(s). - type ( - accountHandlerFuncV2 func(t *testPeerV2, requestId uint64, root common.Hash, origin common.Hash, limit common.Hash, cap int) error - storageHandlerFuncV2 func(t *testPeerV2, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max int) error - codeHandlerFuncV2 func(t *testPeerV2, id uint64, hashes []common.Hash, max int) error + accountHandlerFuncV2 func(t *testPeerV2, requestId uint64, root common.Hash, origin common.Hash, limit common.Hash, cap int) error + storageHandlerFuncV2 func(t *testPeerV2, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max int) error + codeHandlerFuncV2 func(t *testPeerV2, id uint64, hashes []common.Hash, max int) error + accessListHandlerFunc func(t *testPeerV2, id uint64, hashes []common.Hash, max int) error ) type testPeerV2 struct { id string test *testing.T - remote *SyncerV2 + remote *syncerV2 logger log.Logger accountTrie *trie.Trie accountValues []*kv storageTries map[common.Hash]*trie.Trie storageValues map[common.Hash][]*kv + accessLists map[common.Hash]rlp.RawValue // block hash -> RLP-encoded BAL - accountRequestHandler accountHandlerFuncV2 - storageRequestHandler storageHandlerFuncV2 - codeRequestHandler codeHandlerFuncV2 - term func() + accountRequestV2Handler accountHandlerFuncV2 + storageRequestV2Handler storageHandlerFuncV2 + codeRequestHandler codeHandlerFuncV2 + accessListRequestHandler accessListHandlerFunc + term func() // counters - nAccountRequests atomic.Int64 - nStorageRequests atomic.Int64 - nBytecodeRequests atomic.Int64 + nAccountRequests atomic.Int64 + nStorageRequests atomic.Int64 + nBytecodeRequests atomic.Int64 + nAccessListRequests atomic.Int64 } func newTestPeerV2(id string, t *testing.T, term func()) *testPeerV2 { - return &testPeerV2{ - id: id, - test: t, - logger: log.New("id", id), - accountRequestHandler: defaultAccountRequestHandlerV2, - storageRequestHandler: defaultStorageRequestHandlerV2, - codeRequestHandler: defaultCodeRequestHandlerV2, - term: term, + peer := &testPeerV2{ + id: id, + test: t, + logger: log.New("id", id), + accountRequestV2Handler: defaultAccountRequestHandlerV2, + storageRequestV2Handler: defaultStorageRequestHandlerV2, + codeRequestHandler: defaultCodeRequestHandlerV2, + accessListRequestHandler: defaultAccessListRequestHandler, + term: term, } + return peer } func (t *testPeerV2) setStorageTries(tries map[common.Hash]*trie.Trie) { @@ -89,16 +97,13 @@ func (t *testPeerV2) ID() string { return t.id } func (t *testPeerV2) Log() log.Logger { return t.logger } func (t *testPeerV2) Stats() string { - return fmt.Sprintf(`Account requests: %d -Storage requests: %d -Bytecode requests: %d -`, t.nAccountRequests.Load(), t.nStorageRequests.Load(), t.nBytecodeRequests.Load()) + return fmt.Sprintf(`Account requests: %d Storage requests: %d Bytecode requests: %d`, t.nAccountRequests.Load(), t.nStorageRequests.Load(), t.nBytecodeRequests.Load()) } func (t *testPeerV2) RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes int) error { t.logger.Trace("Fetching range of accounts", "reqid", id, "root", root, "origin", origin, "limit", limit, "bytes", common.StorageSize(bytes)) t.nAccountRequests.Add(1) - go t.accountRequestHandler(t, id, root, origin, limit, bytes) + go t.accountRequestV2Handler(t, id, root, origin, limit, bytes) return nil } @@ -109,7 +114,7 @@ func (t *testPeerV2) RequestStorageRanges(id uint64, root common.Hash, accounts } else { t.logger.Trace("Fetching ranges of small storage slots", "reqid", id, "root", root, "accounts", len(accounts), "first", accounts[0], "bytes", common.StorageSize(bytes)) } - go t.storageRequestHandler(t, id, root, accounts, origin, limit, bytes) + go t.storageRequestV2Handler(t, id, root, accounts, origin, limit, bytes) return nil } @@ -120,6 +125,18 @@ func (t *testPeerV2) RequestByteCodes(id uint64, hashes []common.Hash, bytes int return nil } +func (t *testPeerV2) RequestTrieNodes(id uint64, root common.Hash, count int, paths []TrieNodePathSet, bytes int) error { + // snap/2 never requests trie nodes. + return nil +} + +func (t *testPeerV2) RequestAccessLists(id uint64, hashes []common.Hash, bytes int) error { + t.nAccessListRequests.Add(1) + t.logger.Trace("Fetching set of BALs", "reqid", id, "hashes", len(hashes), "bytes", common.StorageSize(bytes)) + go t.accessListRequestHandler(t, id, hashes, bytes) + return nil +} + func createAccountRequestResponseV2(t *testPeerV2, root common.Hash, origin common.Hash, limit common.Hash, cap int) (keys []common.Hash, vals [][]byte, proofs [][]byte) { var size int if limit == (common.Hash{}) { @@ -287,8 +304,26 @@ func defaultCodeRequestHandlerV2(t *testPeerV2, id uint64, hashes []common.Hash, return nil } -// Misbehaving handlers. +// defaultAccessListRequestHandler serves BALs from the peer's accessLists map. +// If the peer has no BAL data, it returns empty (peer rejection). +func defaultAccessListRequestHandler(t *testPeerV2, id uint64, hashes []common.Hash, max int) error { + var results []rlp.RawValue + if t.accessLists != nil { + for _, h := range hashes { + if raw, ok := t.accessLists[h]; ok { + results = append(results, raw) + } + } + } + rawList, _ := rlp.EncodeToRawList(results) + if err := t.remote.OnAccessLists(t, id, rawList); err != nil { + t.test.Errorf("Remote side rejected our delivery: %v", err) + t.term() + } + return nil +} +// emptyRequestAccountRangeFnV2 is a rejects AccountRangeRequests func emptyRequestAccountRangeFnV2(t *testPeerV2, requestId uint64, root common.Hash, origin common.Hash, limit common.Hash, cap int) error { t.remote.OnAccounts(t, requestId, nil, nil, nil) return nil @@ -381,9 +416,74 @@ func noProofStorageRequestHandlerV2(t *testPeerV2, requestId uint64, root common return nil } -func setupSyncerV2(scheme string, peers ...*testPeerV2) *SyncerV2 { +// TestSyncBloatedProofV2 tests a scenario where we provide only _one_ value, but +// also ship the entire trie inside the proof. If the attack is successful, +// the remote side does not do any follow-up requests +func TestSyncBloatedProofV2(t *testing.T) { + t.Parallel() + testSyncBloatedProofV2(t, rawdb.HashScheme) + testSyncBloatedProofV2(t, rawdb.PathScheme) +} + +func testSyncBloatedProofV2(t *testing.T, scheme string) { + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(100, scheme) + source := newTestPeerV2("source", t, term) + source.accountTrie = sourceAccountTrie.Copy() + source.accountValues = elems + + source.accountRequestV2Handler = func(t *testPeerV2, requestId uint64, root common.Hash, origin common.Hash, limit common.Hash, cap int) error { + var ( + keys []common.Hash + vals [][]byte + ) + // The values + for _, entry := range t.accountValues { + if bytes.Compare(entry.k, origin[:]) < 0 { + continue + } + if bytes.Compare(entry.k, limit[:]) > 0 { + continue + } + keys = append(keys, common.BytesToHash(entry.k)) + vals = append(vals, entry.v) + } + // The proofs + proof := trienode.NewProofSet() + if err := t.accountTrie.Prove(origin[:], proof); err != nil { + t.logger.Error("Could not prove origin", "origin", origin, "error", err) + } + // The bloat: add proof of every single element + for _, entry := range t.accountValues { + if err := t.accountTrie.Prove(entry.k, proof); err != nil { + t.logger.Error("Could not prove item", "error", err) + } + } + // And remove one item from the elements + if len(keys) > 2 { + keys = append(keys[:1], keys[2:]...) + vals = append(vals[:1], vals[2:]...) + } + if err := t.remote.OnAccounts(t, requestId, keys, vals, proof.List()); err != nil { + t.logger.Info("remote error on delivery (as expected)", "error", err) + t.term() + // This is actually correct, signal to exit the test successfully + } + return nil + } + syncer := setupSyncerV2(nodeScheme, source) + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err == nil { + t.Fatal("No error returned from incomplete/cancelled sync") + } +} + +func setupSyncerV2(scheme string, peers ...*testPeerV2) *syncerV2 { stateDb := rawdb.NewMemoryDatabase() - syncer := NewSyncerV2(stateDb, scheme) + syncer := newSyncerV2(stateDb, scheme) for _, peer := range peers { syncer.Register(peer) peer.remote = syncer @@ -391,53 +491,34 @@ func setupSyncerV2(scheme string, peers ...*testPeerV2) *SyncerV2 { return syncer } -// verifyFlatState checks that the database contains the snapshot entries for -// every expected account and storage slot, plus the bytecode for every account -// that has one. Trie node presence is intentionally not checked: SyncerV2 only -// downloads flat state. -func verifyFlatState(t *testing.T, db ethdb.KeyValueStore, accountValues []*kv, storageValues map[common.Hash][]*kv) { - t.Helper() - - for _, entry := range accountValues { - hash := common.BytesToHash(entry.k) - got := rawdb.ReadAccountSnapshot(db, hash) - if got == nil { - t.Fatalf("missing account snapshot for %x", hash) - } - var acc types.StateAccount - if err := rlp.DecodeBytes(entry.v, &acc); err != nil { - t.Fatalf("failed to decode source account %x: %v", hash, err) - } - want := types.SlimAccountRLP(acc) - if !bytes.Equal(got, want) { - t.Fatalf("account snapshot mismatch for %x:\n got %x\n want %x", hash, got, want) - } - if !bytes.Equal(acc.CodeHash, types.EmptyCodeHash.Bytes()) { - if !rawdb.HasCode(db, common.BytesToHash(acc.CodeHash)) { - t.Fatalf("missing code for hash %x (account %x)", acc.CodeHash, hash) - } - } +// mkPivot builds a minimal pivot header with the given block number and state +// root, suitable for test calls into syncerV2.Sync. +func mkPivot(num uint64, root common.Hash) *types.Header { + return &types.Header{ + Number: new(big.Int).SetUint64(num), + Root: root, + Difficulty: common.Big0, } - var accounts, slots int - for _, entry := range accountValues { - accounts++ - account := common.BytesToHash(entry.k) - for _, slot := range storageValues[account] { - slotHash := common.BytesToHash(slot.k) - got := rawdb.ReadStorageSnapshot(db, account, slotHash) - if got == nil { - t.Fatalf("missing storage snapshot for account %x slot %x", account, slotHash) - } - if !bytes.Equal(got, slot.v) { - t.Fatalf("storage snapshot mismatch for account %x slot %x:\n got %x\n want %x", account, slotHash, got, slot.v) - } - slots++ - } - } - t.Logf("flat state verified: accounts=%d slots=%d", accounts, slots) } -// TestSyncV2 tests a basic sync with one peer. +// makeAccessListHeaders builds a header map keyed by block hash where each +// header's BlockAccessListHash matches the BAL it points to. fetchAccessLists +// uses these headers to verify peer responses, so tests need to provide them +// alongside any BALs they expect to be accepted. +func makeAccessListHeaders(bals map[common.Hash]rlp.RawValue) map[common.Hash]*types.Header { + headers := make(map[common.Hash]*types.Header, len(bals)) + for h, raw := range bals { + var b bal.BlockAccessList + if err := rlp.DecodeBytes(raw, &b); err != nil { + continue + } + bh := b.Hash() + headers[h] = &types.Header{BlockAccessListHash: &bh} + } + return headers +} + +// TestSyncV2 tests a basic sync with one peer func TestSyncV2(t *testing.T) { t.Parallel() testSyncV2(t, rawdb.HashScheme) @@ -459,13 +540,49 @@ func testSyncV2(t *testing.T, scheme string) { return source } syncer := setupSyncerV2(nodeScheme, mkSource("source")) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } - verifyFlatState(t, syncer.db, elems, nil) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) + verifyAdoptedSyncedState(scheme, syncer.db, sourceAccountTrie.Hash(), elems, t) } -// TestSyncTinyTriePanicV2 tests a basic sync with one peer and a tiny trie. +// verifyAdoptedSyncedState exercises the snap/2 completion contract end-to-end: +// after a real sync, opening a fresh triedb and calling AdoptSyncedState must +// (a) succeed and (b) leave flat-state reads serving immediately, with no +// background regeneration gating them. +func verifyAdoptedSyncedState(scheme string, db ethdb.KeyValueStore, root common.Hash, elems []*kv, t *testing.T) { + t.Helper() + if scheme != rawdb.PathScheme { + return + } + tdb := triedb.NewDatabase(rawdb.NewDatabase(db), newDbConfig(scheme)) + defer tdb.Close() + + if err := tdb.AdoptSyncedState(root); err != nil { + t.Fatalf("AdoptSyncedState failed: %v", err) + } + // Read one of the synced accounts via the public flat-state API. If this + // returned errNotCoveredYet we'd know AdoptSyncedState left a generator + // gating reads, exactly the bug we're trying to prevent. + sr, err := tdb.StateReader(root) + if err != nil { + t.Fatalf("StateReader: %v", err) + } + if len(elems) == 0 { + return + } + acc, err := sr.Account(common.BytesToHash(elems[0].k)) + if err != nil { + t.Fatalf("flat-state read failed after AdoptSyncedState: %v", err) + } + if acc == nil { + t.Fatal("flat-state read returned nil account; sync did not populate the snapshot namespace") + } +} + +// TestSyncTinyTriePanicV2 tests a basic sync with one peer, and a tiny trie. This caused a +// panic within the prover func TestSyncTinyTriePanicV2(t *testing.T) { t.Parallel() testSyncTinyTriePanicV2(t, rawdb.HashScheme) @@ -488,14 +605,14 @@ func testSyncTinyTriePanicV2(t *testing.T, scheme string) { } syncer := setupSyncerV2(nodeScheme, mkSource("source")) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyFlatState(t, syncer.db, elems, nil) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } -// TestMultiSyncV2 tests a basic sync with multiple peers. +// TestMultiSyncV2 tests a basic sync with multiple peers func TestMultiSyncV2(t *testing.T) { t.Parallel() testMultiSyncV2(t, rawdb.HashScheme) @@ -518,14 +635,14 @@ func testMultiSyncV2(t *testing.T, scheme string) { } syncer := setupSyncerV2(nodeScheme, mkSource("sourceA"), mkSource("sourceB")) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyFlatState(t, syncer.db, elems, nil) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } -// TestSyncWithStorageV2 tests basic sync using accounts + storage + code. +// TestSyncWithStorageV2 tests basic sync using accounts + storage + code func TestSyncWithStorageV2(t *testing.T) { t.Parallel() testSyncWithStorageV2(t, rawdb.HashScheme) @@ -550,14 +667,14 @@ func testSyncWithStorageV2(t *testing.T, scheme string) { } syncer := setupSyncerV2(scheme, mkSource("sourceA")) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyFlatState(t, syncer.db, elems, storageElems) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } -// TestMultiSyncManyUselessV2 keeps one good peer and several that return empty. +// TestMultiSyncManyUselessV2 contains one good peer, and many which doesn't return anything valuable at all func TestMultiSyncManyUselessV2(t *testing.T) { t.Parallel() testMultiSyncManyUselessV2(t, rawdb.HashScheme) @@ -570,7 +687,7 @@ func testMultiSyncManyUselessV2(t *testing.T, scheme string) { cancel = make(chan struct{}) term = func() { once.Do(func() { close(cancel) }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 3000, true, false, false) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 300, true, false, false) mkSource := func(name string, noAccount, noStorage bool) *testPeerV2 { source := newTestPeerV2(name, t, term) @@ -578,30 +695,29 @@ func testMultiSyncManyUselessV2(t *testing.T, scheme string) { source.accountValues = elems source.setStorageTries(storageTries) source.storageValues = storageElems - if noAccount { - source.accountRequestHandler = emptyRequestAccountRangeFnV2 + if !noAccount { + source.accountRequestV2Handler = emptyRequestAccountRangeFnV2 } - if noStorage { - source.storageRequestHandler = emptyStorageRequestHandlerV2 + if !noStorage { + source.storageRequestV2Handler = emptyStorageRequestHandlerV2 } return source } syncer := setupSyncerV2( scheme, - mkSource("full", false, false), - mkSource("noAccounts", true, false), - mkSource("noStorage", false, true), + mkSource("full", true, true), + mkSource("noAccounts", false, true), + mkSource("noStorage", true, false), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyFlatState(t, syncer.db, elems, storageElems) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } -// TestMultiSyncManyUselessWithLowTimeoutV2 is the same as above but with a very -// low timeout, exercising the timeout/reschedule paths. +// TestMultiSyncManyUselessWithLowTimeoutV2 contains one good peer, and many which doesn't return anything valuable at all func TestMultiSyncManyUselessWithLowTimeoutV2(t *testing.T) { t.Parallel() testMultiSyncManyUselessWithLowTimeoutV2(t, rawdb.HashScheme) @@ -614,7 +730,7 @@ func testMultiSyncManyUselessWithLowTimeoutV2(t *testing.T, scheme string) { cancel = make(chan struct{}) term = func() { once.Do(func() { close(cancel) }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 3000, true, false, false) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 300, true, false, false) mkSource := func(name string, noAccount, noStorage bool) *testPeerV2 { source := newTestPeerV2(name, t, term) @@ -623,10 +739,10 @@ func testMultiSyncManyUselessWithLowTimeoutV2(t *testing.T, scheme string) { source.setStorageTries(storageTries) source.storageValues = storageElems if !noAccount { - source.accountRequestHandler = emptyRequestAccountRangeFnV2 + source.accountRequestV2Handler = emptyRequestAccountRangeFnV2 } if !noStorage { - source.storageRequestHandler = emptyStorageRequestHandlerV2 + source.storageRequestV2Handler = emptyStorageRequestHandlerV2 } return source } @@ -639,15 +755,14 @@ func testMultiSyncManyUselessWithLowTimeoutV2(t *testing.T, scheme string) { syncer.rates.OverrideTTLLimit = time.Millisecond done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyFlatState(t, syncer.db, elems, storageElems) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } -// TestMultiSyncManyUnresponsiveV2 keeps one good peer and several that don't -// respond at all. +// TestMultiSyncManyUnresponsiveV2 contains one good peer, and many which doesn't respond at all func TestMultiSyncManyUnresponsiveV2(t *testing.T) { t.Parallel() testMultiSyncManyUnresponsiveV2(t, rawdb.HashScheme) @@ -660,7 +775,7 @@ func testMultiSyncManyUnresponsiveV2(t *testing.T, scheme string) { cancel = make(chan struct{}) term = func() { once.Do(func() { close(cancel) }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 3000, true, false, false) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 300, true, false, false) mkSource := func(name string, noAccount, noStorage bool) *testPeerV2 { source := newTestPeerV2(name, t, term) @@ -668,28 +783,28 @@ func testMultiSyncManyUnresponsiveV2(t *testing.T, scheme string) { source.accountValues = elems source.setStorageTries(storageTries) source.storageValues = storageElems - if noAccount { - source.accountRequestHandler = nonResponsiveRequestAccountRangeFnV2 + if !noAccount { + source.accountRequestV2Handler = nonResponsiveRequestAccountRangeFnV2 } - if noStorage { - source.storageRequestHandler = nonResponsiveStorageRequestHandlerV2 + if !noStorage { + source.storageRequestV2Handler = nonResponsiveStorageRequestHandlerV2 } return source } syncer := setupSyncerV2( scheme, - mkSource("full", false, false), - mkSource("noAccounts", true, false), - mkSource("noStorage", false, true), + mkSource("full", true, true), + mkSource("noAccounts", false, true), + mkSource("noStorage", true, false), ) syncer.rates.OverrideTTLLimit = time.Millisecond done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyFlatState(t, syncer.db, elems, storageElems) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestSyncBoundaryAccountTrieV2 tests sync against a few normal peers, but the @@ -720,15 +835,15 @@ func testSyncBoundaryAccountTrieV2(t *testing.T, scheme string) { mkSource("peer-b"), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyFlatState(t, syncer.db, elems, nil) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } -// TestSyncNoStorageAndOneCappedPeerV2 tests sync using accounts and no storage, -// where one peer is consistently returning very small results. +// TestSyncNoStorageAndOneCappedPeerV2 tests sync using accounts and no storage, where one peer is +// consistently returning very small results func TestSyncNoStorageAndOneCappedPeerV2(t *testing.T) { t.Parallel() testSyncNoStorageAndOneCappedPeerV2(t, rawdb.HashScheme) @@ -748,7 +863,7 @@ func testSyncNoStorageAndOneCappedPeerV2(t *testing.T, scheme string) { source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems if slow { - source.accountRequestHandler = starvingAccountRequestHandlerV2 + source.accountRequestV2Handler = starvingAccountRequestHandlerV2 } return source } @@ -761,14 +876,14 @@ func testSyncNoStorageAndOneCappedPeerV2(t *testing.T, scheme string) { mkSource("capped", true), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyFlatState(t, syncer.db, elems, nil) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } -// TestSyncNoStorageAndOneCodeCorruptPeerV2 has one peer that doesn't deliver +// TestSyncNoStorageAndOneCodeCorruptPeerV2 has one peer which doesn't deliver // code requests properly. func TestSyncNoStorageAndOneCodeCorruptPeerV2(t *testing.T) { t.Parallel() @@ -797,11 +912,11 @@ func testSyncNoStorageAndOneCodeCorruptPeerV2(t *testing.T, scheme string) { mkSource("corrupt", corruptCodeRequestHandlerV2), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyFlatState(t, syncer.db, elems, nil) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } func TestSyncNoStorageAndOneAccountCorruptPeerV2(t *testing.T) { @@ -822,24 +937,24 @@ func testSyncNoStorageAndOneAccountCorruptPeerV2(t *testing.T, scheme string) { source := newTestPeerV2(name, t, term) source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems - source.accountRequestHandler = accFn + source.accountRequestV2Handler = accFn return source } syncer := setupSyncerV2( nodeScheme, - mkSource("capped", starvingAccountRequestHandlerV2), + mkSource("capped", defaultAccountRequestHandlerV2), mkSource("corrupt", corruptAccountRequestHandlerV2), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyFlatState(t, syncer.db, elems, nil) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } -// TestSyncNoStorageAndOneCodeCappedPeerV2 has one peer that delivers code -// hashes one by one. +// TestSyncNoStorageAndOneCodeCappedPeerV2 has one peer which delivers code hashes +// one by one func TestSyncNoStorageAndOneCodeCappedPeerV2(t *testing.T) { t.Parallel() testSyncNoStorageAndOneCodeCappedPeerV2(t, rawdb.HashScheme) @@ -870,7 +985,7 @@ func testSyncNoStorageAndOneCodeCappedPeerV2(t *testing.T, scheme string) { }), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) @@ -878,7 +993,7 @@ func testSyncNoStorageAndOneCodeCappedPeerV2(t *testing.T, scheme string) { if threshold := 100; counter > threshold { t.Logf("Error, expected < %d invocations, got %d", threshold, counter) } - verifyFlatState(t, syncer.db, elems, nil) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestSyncBoundaryStorageTrieV2 tests sync against a few normal peers, but the @@ -911,15 +1026,15 @@ func testSyncBoundaryStorageTrieV2(t *testing.T, scheme string) { mkSource("peer-b"), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyFlatState(t, syncer.db, elems, storageElems) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } -// TestSyncWithStorageAndOneCappedPeerV2 tests sync using accounts + storage, -// where one peer is consistently returning very small results. +// TestSyncWithStorageAndOneCappedPeerV2 tests sync using accounts + storage, where one peer is +// consistently returning very small results func TestSyncWithStorageAndOneCappedPeerV2(t *testing.T) { t.Parallel() testSyncWithStorageAndOneCappedPeerV2(t, rawdb.HashScheme) @@ -932,7 +1047,7 @@ func testSyncWithStorageAndOneCappedPeerV2(t *testing.T, scheme string) { cancel = make(chan struct{}) term = func() { once.Do(func() { close(cancel) }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 300, 1000, false, false, false) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 300, 100, false, false, false) mkSource := func(name string, slow bool) *testPeerV2 { source := newTestPeerV2(name, t, term) @@ -941,7 +1056,7 @@ func testSyncWithStorageAndOneCappedPeerV2(t *testing.T, scheme string) { source.setStorageTries(storageTries) source.storageValues = storageElems if slow { - source.storageRequestHandler = starvingStorageRequestHandlerV2 + source.storageRequestV2Handler = starvingStorageRequestHandlerV2 } return source } @@ -951,15 +1066,15 @@ func testSyncWithStorageAndOneCappedPeerV2(t *testing.T, scheme string) { mkSource("slow", true), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyFlatState(t, syncer.db, elems, storageElems) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } -// TestSyncWithStorageAndCorruptPeerV2 tests sync using accounts + storage, -// where one peer is sometimes sending bad proofs. +// TestSyncWithStorageAndCorruptPeerV2 tests sync using accounts + storage, where one peer is +// sometimes sending bad proofs func TestSyncWithStorageAndCorruptPeerV2(t *testing.T) { t.Parallel() testSyncWithStorageAndCorruptPeerV2(t, rawdb.HashScheme) @@ -972,7 +1087,7 @@ func testSyncWithStorageAndCorruptPeerV2(t *testing.T, scheme string) { cancel = make(chan struct{}) term = func() { once.Do(func() { close(cancel) }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 3000, true, false, false) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 300, true, false, false) mkSource := func(name string, handler storageHandlerFuncV2) *testPeerV2 { source := newTestPeerV2(name, t, term) @@ -980,7 +1095,7 @@ func testSyncWithStorageAndCorruptPeerV2(t *testing.T, scheme string) { source.accountValues = elems source.setStorageTries(storageTries) source.storageValues = storageElems - source.storageRequestHandler = handler + source.storageRequestV2Handler = handler return source } syncer := setupSyncerV2( @@ -991,11 +1106,11 @@ func testSyncWithStorageAndCorruptPeerV2(t *testing.T, scheme string) { mkSource("corrupt", corruptStorageRequestHandlerV2), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyFlatState(t, syncer.db, elems, storageElems) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } func TestSyncWithStorageAndNonProvingPeerV2(t *testing.T) { @@ -1010,7 +1125,7 @@ func testSyncWithStorageAndNonProvingPeerV2(t *testing.T, scheme string) { cancel = make(chan struct{}) term = func() { once.Do(func() { close(cancel) }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 3000, true, false, false) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 300, true, false, false) mkSource := func(name string, handler storageHandlerFuncV2) *testPeerV2 { source := newTestPeerV2(name, t, term) @@ -1018,7 +1133,7 @@ func testSyncWithStorageAndNonProvingPeerV2(t *testing.T, scheme string) { source.accountValues = elems source.setStorageTries(storageTries) source.storageValues = storageElems - source.storageRequestHandler = handler + source.storageRequestV2Handler = handler return source } syncer := setupSyncerV2( @@ -1029,16 +1144,17 @@ func testSyncWithStorageAndNonProvingPeerV2(t *testing.T, scheme string) { mkSource("corrupt", noProofStorageRequestHandlerV2), ) done := checkStall(t, term) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyFlatState(t, syncer.db, elems, storageElems) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } -// TestSyncWithStorageMisbehavingProveV2 tests basic sync using accounts + -// storage + code against a peer that insists on delivering full storage sets -// _and_ proofs. +// TestSyncWithStorageMisbehavingProveV2 tests basic sync using accounts + storage + code, against +// a peer who insists on delivering full storage sets _and_ proofs. This triggered +// an error, where the recipient erroneously clipped the boundary nodes, but +// did not mark the account for healing. func TestSyncWithStorageMisbehavingProveV2(t *testing.T) { t.Parallel() testSyncWithStorageMisbehavingProveV2(t, rawdb.HashScheme) @@ -1059,14 +1175,14 @@ func testSyncWithStorageMisbehavingProveV2(t *testing.T, scheme string) { source.accountValues = elems source.setStorageTries(storageTries) source.storageValues = storageElems - source.storageRequestHandler = proofHappyStorageRequestHandlerV2 + source.storageRequestV2Handler = proofHappyStorageRequestHandlerV2 return source } syncer := setupSyncerV2(nodeScheme, mkSource("sourceA")) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, sourceAccountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } - verifyFlatState(t, syncer.db, elems, storageElems) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestSyncWithUnevenStorageV2 tests sync where the storage trie is not even @@ -1091,74 +1207,1516 @@ func testSyncWithUnevenStorageV2(t *testing.T, scheme string) { source.accountValues = accounts source.setStorageTries(storageTries) source.storageValues = storageElems - source.storageRequestHandler = func(t *testPeerV2, reqId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max int) error { - return defaultStorageRequestHandlerV2(t, reqId, root, accounts, origin, limit, 128) + source.storageRequestV2Handler = func(t *testPeerV2, reqId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max int) error { + return defaultStorageRequestHandlerV2(t, reqId, root, accounts, origin, limit, 128) // retrieve storage in large mode } return source } syncer := setupSyncerV2(scheme, mkSource("source")) - if err := syncer.Sync(accountTrie.Hash(), cancel); err != nil { + if err := syncer.Sync(mkPivot(0, accountTrie.Hash()), cancel); err != nil { t.Fatalf("sync failed: %v", err) } - verifyFlatState(t, syncer.db, accounts, storageElems) + verifyTrie(scheme, syncer.db, accountTrie.Hash(), t) } -// TestSyncBloatedProofV2 tests a scenario where the peer ships only one value -// but inflates the proof with the entire trie. If the attack is successful the -// remote side does not do any follow-up requests. -func TestSyncBloatedProofV2(t *testing.T) { +// makeAccountTrieWithAddresses creates an account trie keyed by keccak(address), +// matching production behavior. Returns the trie, sorted entries, and the +// addresses used. This allows BAL-based tests to target specific addresses and +// have applyAccessList write to the same snapshot keys as the download. +func makeAccountTrieWithAddresses(n int, scheme string) (string, *trie.Trie, []*kv, []common.Address) { + var ( + db = triedb.NewDatabase(rawdb.NewMemoryDatabase(), newDbConfig(scheme)) + accTrie = trie.NewEmpty(db) + entries []*kv + addrs []common.Address + ) + for i := uint64(1); i <= uint64(n); i++ { + // Deterministic address from index + addr := common.BigToAddress(new(big.Int).SetUint64(i)) + addrs = append(addrs, addr) + + value, _ := rlp.EncodeToBytes(&types.StateAccount{ + Nonce: i, + Balance: uint256.NewInt(i), + Root: types.EmptyRootHash, + CodeHash: types.EmptyCodeHash[:], + }) + key := crypto.Keccak256(addr[:]) + elem := &kv{key, value} + accTrie.MustUpdate(elem.k, elem.v) + entries = append(entries, elem) + } + slices.SortFunc(entries, (*kv).cmp) + + root, nodes := accTrie.Commit(false) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), triedb.NewStateSet()) + + accTrie, _ = trie.New(trie.StateTrieID(root), db) + return db.Scheme(), accTrie, entries, addrs +} + +// TestIsPivotReorged verifies the four conditions isPivotReorged covers: +// reorged out, non-advancing pivot, missing canonical, and the happy path +// where the previous pivot is still canonical and the new pivot advances. +func TestIsPivotReorged(t *testing.T) { t.Parallel() - testSyncBloatedProofV2(t, rawdb.HashScheme) - testSyncBloatedProofV2(t, rawdb.PathScheme) + + // Reorged: canonical hash at prev's height differs from prev. The + // previous pivot was reorged out by an alternate chain at the same + // (or higher) height. + t.Run("Reorged_DifferentHash", func(t *testing.T) { + db := rawdb.NewMemoryDatabase() + prev := mkPivot(100, common.HexToHash("0xaaaa")) + curr := mkPivot(105, common.HexToHash("0xcccc")) + canonical := mkPivot(100, common.HexToHash("0xbbbb")) + rawdb.WriteHeader(db, canonical) + rawdb.WriteCanonicalHash(db, canonical.Hash(), canonical.Number.Uint64()) + + if !isPivotReorged(db, prev, curr) { + t.Fatal("expected reorg detection when canonical hash differs") + } + }) + + // NonAdvancingPivot: new pivot is at or below the old one. There's + // nothing for catchUp to roll forward, regardless of canonical state. + t.Run("NonAdvancingPivot", func(t *testing.T) { + db := rawdb.NewMemoryDatabase() + prev := mkPivot(100, common.HexToHash("0xaaaa")) + curr := mkPivot(95, common.HexToHash("0xcccc")) + rawdb.WriteHeader(db, prev) + rawdb.WriteCanonicalHash(db, prev.Hash(), prev.Number.Uint64()) + + if !isPivotReorged(db, prev, curr) { + t.Fatal("expected reorg detection when new pivot is at or below the old one") + } + }) + + // MissingCanonical: canonical hash at prev's height is absent while + // curr advances past it. By the time Sync is called, headers up to + // curr should be indexed, so this implies broken chain state. + t.Run("MissingCanonical", func(t *testing.T) { + db := rawdb.NewMemoryDatabase() + prev := mkPivot(100, common.HexToHash("0xaaaa")) + curr := mkPivot(105, common.HexToHash("0xcccc")) + + if !isPivotReorged(db, prev, curr) { + t.Fatal("expected reorg detection when canonical hash is missing at prev's height") + } + }) + + // NotReorged_SameHash: prev is still canonical and curr advances past + // it. Catch-up is feasible. + t.Run("NotReorged_SameHash", func(t *testing.T) { + db := rawdb.NewMemoryDatabase() + prev := mkPivot(100, common.HexToHash("0xaaaa")) + curr := mkPivot(105, common.HexToHash("0xcccc")) + rawdb.WriteHeader(db, prev) + rawdb.WriteCanonicalHash(db, prev.Hash(), prev.Number.Uint64()) + + if isPivotReorged(db, prev, curr) { + t.Fatal("should not detect reorg when prev is canonical and curr advances") + } + }) } -func testSyncBloatedProofV2(t *testing.T, scheme string) { +// TestSyncDetectsPivotReorged exercises the reorg-handling branch in Sync +// end-to-end. +// +// Setup: persisted progress points at an orphan pivot at block 100; the new +// canonical header at block 100 has a different hash. Sync is then called with +// a new pivot at the same height. +// +// If isPivotReorged works, loadSyncStatus restores previousPivot, the check +// flags it as reorged, resetSyncState clears previousPivot, catchUp is +// skipped, and the fresh download proceeds to completion. +// +// If detection doesn't fire, the pivot-move check would call catchUp with +// from = 101 and to = 100 — the inverted-range guard surfaces that as an +// error, failing the test. So Sync returning nil is the positive signal that +// reorg detection and the reset worked. +func TestSyncDetectsPivotReorged(t *testing.T) { + t.Parallel() + + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(100, rawdb.HashScheme) + root := sourceAccountTrie.Hash() + + db := rawdb.NewMemoryDatabase() + + // Persist progress against an orphan pivot — same height as the new + // canonical pivot we'll sync to, different hash. Populate a partial task + // and non-zero counter so the reset path has something to clean up. + orphanPivot := mkPivot(100, common.HexToHash("0xdead")) + seed := newSyncerV2(db, nodeScheme) + // previousPivot reflects where flat state matches and it is what + // saveSyncStatus persists. Set it to simulate a prior sync reaching + // orphanPivot. + seed.previousPivot = orphanPivot + seed.pivot = orphanPivot + seed.accountSynced = 42 + seed.tasks = []*accountTaskV2{{ + Next: common.HexToHash("0x80"), + Last: common.MaxHash, + SubTasks: make(map[common.Hash][]*storageTaskV2), + stateCompleted: make(map[common.Hash]struct{}), + }} + seed.saveSyncStatus() + + // Pre-write orphan flat-state entries at hashes the test peer won't + // re-serve. After resetSyncState wipes the snapshot ranges, these + // should be gone. + orphanAccountHash := common.HexToHash("0xdeadbeef") + rawdb.WriteAccountSnapshot(db, orphanAccountHash, []byte{0xde, 0xad}) + orphanStorageAccount := common.HexToHash("0xfeedfacefeedfacefeedfacefeedfacefeedfacefeedfacefeedfacefeedface") + orphanStorageSlot := common.HexToHash("0xabcd") + rawdb.WriteStorageSnapshot(db, orphanStorageAccount, orphanStorageSlot, []byte{0xff, 0xff}) + + // Canonical header at block 100 is newPivot — different hash from the + // orphan pivot, which is what isPivotReorged will detect. + newPivot := mkPivot(100, root) + rawdb.WriteHeader(db, newPivot) + rawdb.WriteCanonicalHash(db, newPivot.Hash(), newPivot.Number.Uint64()) + + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + syncer := newSyncerV2(db, nodeScheme) + src := newTestPeerV2("source", t, term) + src.accountTrie = sourceAccountTrie.Copy() + src.accountValues = elems + syncer.Register(src) + src.remote = syncer + + if err := syncer.Sync(newPivot, cancel); err != nil { + t.Fatalf("sync failed (reorg detection likely broken): %v", err) + } + // After successful completion, status should be marked Complete=true + // against the new (canonical) pivot. + loader := newSyncerV2(db, nodeScheme) + loader.loadSyncStatus() + if !loader.complete { + t.Fatal("sync status should be marked Complete=true after successful completion") + } + if loader.previousPivot == nil || loader.previousPivot.Hash() != newPivot.Hash() { + t.Fatalf("expected persisted pivot to match new pivot") + } + if data := rawdb.ReadAccountSnapshot(db, orphanAccountHash); len(data) != 0 { + t.Errorf("orphan account snapshot should be wiped, got %x", data) + } + if val := rawdb.ReadStorageSnapshot(db, orphanStorageAccount, orphanStorageSlot); len(val) != 0 { + t.Errorf("orphan storage snapshot should be wiped, got %x", val) + } +} + +// TestInterruptedDownloadRecovery verifies that partially completed download +// state is persisted and resumed on restart. +func TestInterruptedDownloadRecovery(t *testing.T) { + t.Parallel() + testInterruptedDownloadRecovery(t, rawdb.HashScheme) + testInterruptedDownloadRecovery(t, rawdb.PathScheme) +} + +func testInterruptedDownloadRecovery(t *testing.T, scheme string) { + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(100, scheme) + root := sourceAccountTrie.Hash() + + // Cancel after exactly 2 account range responses, guaranteeing partial + // completion without any timing dependency. + var ( + once1 sync.Once + cancel1 = make(chan struct{}) + term1 = func() { once1.Do(func() { close(cancel1) }) } + responses atomic.Int32 + ) + cancelAfterHandler := func(tp *testPeerV2, id uint64, root common.Hash, origin common.Hash, limit common.Hash, cap int) error { + if responses.Add(1) > 2 { + term1() + return nil + } + return defaultAccountRequestHandlerV2(tp, id, root, origin, limit, cap) + } + db := rawdb.NewMemoryDatabase() + syncer1 := newSyncerV2(db, nodeScheme) + src1 := newTestPeerV2("source1", t, term1) + src1.accountTrie = sourceAccountTrie.Copy() + src1.accountValues = elems + src1.accountRequestV2Handler = cancelAfterHandler + syncer1.Register(src1) + src1.remote = syncer1 + pivot := mkPivot(0, root) + syncer1.pivot = pivot + syncer1.previousPivot = pivot // Sync sets this before downloadState + syncer1.loadSyncStatus() + syncer1.downloadState(cancel1) + + // Save progress + for _, task := range syncer1.tasks { + syncer1.forwardAccountTask(task) + } + syncer1.cleanAccountTasks() + syncer1.saveSyncStatus() + + // Count how many accounts were downloaded in the first run. + // Due to the async nature of response processing, the cancel may race + // with delivery so 0 accounts may be written. + firstRunCount := 0 + for _, entry := range elems { + if data := rawdb.ReadAccountSnapshot(db, common.BytesToHash(entry.k)); len(data) > 0 { + firstRunCount++ + } + } + if firstRunCount == len(elems) { + t.Fatal("first run should not have downloaded everything") + } + + // Second run: resume with same root, should complete the download + var ( + once2 sync.Once + cancel2 = make(chan struct{}) + term2 = func() { once2.Do(func() { close(cancel2) }) } + ) + syncer2 := newSyncerV2(db, nodeScheme) + src2 := newTestPeerV2("source2", t, term2) + src2.accountTrie = sourceAccountTrie.Copy() + src2.accountValues = elems + syncer2.Register(src2) + src2.remote = syncer2 + pivot2 := mkPivot(0, root) + syncer2.pivot = pivot2 + syncer2.previousPivot = pivot2 // Sync sets this before downloadState + syncer2.loadSyncStatus() + if err := syncer2.downloadState(cancel2); err != nil { + t.Fatalf("resumed download failed: %v", err) + } + + // Verify all accounts are now present + for _, entry := range elems { + if data := rawdb.ReadAccountSnapshot(db, common.BytesToHash(entry.k)); len(data) == 0 { + t.Errorf("missing account after resumed download: %x", entry.k) + } + } +} + +// TestSyncPersistsPivotDuringDownload verifies that after a fresh Sync is +// interrupted mid-download, the persisted previousPivot equals the current +// pivot (not nil). Without this, a follow-up Sync at a different pivot +// would not see that the partial flat state belongs to the old pivot, and +// would mix old-pivot accounts with new-pivot data. +func TestSyncPersistsPivotDuringDownload(t *testing.T) { + t.Parallel() + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(100, rawdb.HashScheme) + + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + responses atomic.Int32 + ) + db := rawdb.NewMemoryDatabase() + syncer := newSyncerV2(db, nodeScheme) + src := newTestPeerV2("source", t, term) + src.accountTrie = sourceAccountTrie.Copy() + src.accountValues = elems + src.accountRequestV2Handler = func(tp *testPeerV2, id uint64, root common.Hash, origin common.Hash, limit common.Hash, cap int) error { + if responses.Add(1) > 2 { + term() + return nil + } + return defaultAccountRequestHandlerV2(tp, id, root, origin, limit, cap) + } + syncer.Register(src) + src.remote = syncer + + pivot := mkPivot(0, sourceAccountTrie.Hash()) + // Sync should be interrupted by the cancel after a couple of responses. + _ = syncer.Sync(pivot, cancel) + + // Persisted previousPivot must equal the pivot, so a follow-up Sync at a + // different pivot can recognize the partial flat state belongs to this one. + loader := newSyncerV2(db, nodeScheme) + loader.loadSyncStatus() + if loader.previousPivot == nil { + t.Fatal("expected persisted previousPivot to be set after interrupted download, got nil") + } + if loader.previousPivot.Hash() != pivot.Hash() { + t.Errorf("persisted previousPivot mismatch: got %v, want %v", loader.previousPivot.Hash(), pivot.Hash()) + } +} + +// TestPivotMovement verifies the full pivot move flow: download with rootA, +// cancel+restart with rootB, catch-up applies BAL diffs, download resumes +// and completes against the new state. +func TestPivotMovement(t *testing.T) { + t.Parallel() + testPivotMovement(t, rawdb.HashScheme, 1) + testPivotMovement(t, rawdb.PathScheme, 1) +} + +// TestPivotMovementRepeated verifies that multiple pivot moves work correctly. +func TestPivotMovementRepeated(t *testing.T) { + t.Parallel() + testPivotMovement(t, rawdb.HashScheme, 2) + testPivotMovement(t, rawdb.PathScheme, 2) +} + +func testPivotMovement(t *testing.T, scheme string, pivotMoves int) { + // Use makeAccountTrieWithAddresses so trie keys are keccak(addr), + // matching what applyAccessList writes to the snapshot DB. + nodeScheme, sourceAccountTrie, elems, addrs := makeAccountTrieWithAddresses(100, scheme) + numA := uint64(100) + + // Target account 50 for BAL changes + targetAddr := addrs[49] + targetHash := crypto.Keccak256Hash(targetAddr[:]) + + type pivotMove struct { + blockNum uint64 + trie *trie.Trie + elems []*kv + root common.Hash + bals map[common.Hash]rlp.RawValue // header hash -> encoded BAL + balance *uint256.Int + } + + // Build each pivot move: update account 50's balance in both the trie + // and a BAL, write the header, and record everything. + db := rawdb.NewMemoryDatabase() + currentElems := elems + moves := make([]pivotMove, pivotMoves) + emptyHash := common.Hash{} + zero := uint64(0) + for m := 0; m < pivotMoves; m++ { + blockNum := numA + uint64(m) + 1 + balance := uint256.NewInt(uint64(1000 * (m + 1))) + + // Build updated trie with new balance for account 50 + trieDB := triedb.NewDatabase(rawdb.NewMemoryDatabase(), newDbConfig(scheme)) + newTrie := trie.NewEmpty(trieDB) + newElems := make([]*kv, len(currentElems)) + for i, entry := range currentElems { + if bytes.Equal(entry.k, targetHash[:]) { + val, _ := rlp.EncodeToBytes(&types.StateAccount{ + Nonce: 50, Balance: balance, + Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash[:], + }) + newElems[i] = &kv{entry.k, val} + } else { + newElems[i] = entry + } + newTrie.MustUpdate(newElems[i].k, newElems[i].v) + } + newRoot, nodes := newTrie.Commit(false) + trieDB.Update(newRoot, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), triedb.NewStateSet()) + resultTrie, _ := trie.New(trie.StateTrieID(newRoot), trieDB) + + // Build BAL matching the trie diff + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, targetAddr, balance) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + + // Compute BAL hash, write header, store BAL keyed by header hash + var b bal.BlockAccessList + if err := rlp.DecodeBytes(buf.Bytes(), &b); err != nil { + t.Fatal(err) + } + balHash := b.Hash() + header := &types.Header{ + Number: new(big.Int).SetUint64(blockNum), Difficulty: common.Big0, + BaseFee: common.Big0, WithdrawalsHash: &emptyHash, + BlobGasUsed: &zero, ExcessBlobGas: &zero, + ParentBeaconRoot: &emptyHash, RequestsHash: &emptyHash, + BlockAccessListHash: &balHash, + } + rawdb.WriteHeader(db, header) + headerHash := header.Hash() + rawdb.WriteCanonicalHash(db, headerHash, blockNum) + moves[m] = pivotMove{ + blockNum: blockNum, + trie: resultTrie, + elems: newElems, + root: newRoot, + bals: map[common.Hash]rlp.RawValue{headerHash: buf.Bytes()}, + balance: balance, + } + currentElems = newElems + } + + // First run: download against rootA, cancel after 2 responses + rootA := sourceAccountTrie.Hash() + var ( + once1 sync.Once + cancel1 = make(chan struct{}) + term1 = func() { once1.Do(func() { close(cancel1) }) } + responses atomic.Int32 + ) + syncer1 := newSyncerV2(db, nodeScheme) + src1 := newTestPeerV2("source1", t, term1) + src1.accountTrie = sourceAccountTrie.Copy() + src1.accountValues = elems + src1.accountRequestV2Handler = func(tp *testPeerV2, id uint64, root common.Hash, origin common.Hash, limit common.Hash, cap int) error { + if responses.Add(1) > 2 { + term1() + return nil + } + return defaultAccountRequestHandlerV2(tp, id, root, origin, limit, cap) + } + syncer1.Register(src1) + src1.remote = syncer1 + syncer1.Sync(mkPivot(numA, rootA), cancel1) + + // Subsequent runs: each move triggers catch-up then resumes download + for i, move := range moves { + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + syncer := newSyncerV2(db, nodeScheme) + src := newTestPeerV2(fmt.Sprintf("source-%d", i+2), t, term) + src.accountTrie = move.trie.Copy() + src.accountValues = move.elems + src.accessLists = move.bals + syncer.Register(src) + src.remote = syncer + if err := syncer.Sync(mkPivot(move.blockNum, move.root), cancel); err != nil { + t.Fatalf("pivot move %d: sync failed: %v", i+1, err) + } + + // Verify account 50's balance was updated by catch-up + data := rawdb.ReadAccountSnapshot(db, targetHash) + if len(data) == 0 { + t.Fatalf("pivot move %d: account 50 not found after sync", i+1) + } + account, aErr := types.FullAccount(data) + if aErr != nil { + t.Fatalf("pivot move %d: failed to decode account: %v", i+1, aErr) + } + if account.Balance.Cmp(move.balance) != 0 { + t.Errorf("pivot move %d: balance wrong: got %v, want %v", i+1, account.Balance, move.balance) + } + } +} + +// TestCatchUpPersistsIncrementally verifies that catchUp updates and persists +// previousPivot after each successfully applied BAL. If a later block in the +// gap fails to apply, the persisted state reflects the last successful block, +// so a follow-up Sync can resume from there rather than reapplying everything. +func TestCatchUpPersistsIncrementally(t *testing.T) { + t.Parallel() + testCatchUpPersistsIncrementally(t, rawdb.HashScheme) + testCatchUpPersistsIncrementally(t, rawdb.PathScheme) +} + +func testCatchUpPersistsIncrementally(t *testing.T, scheme string) { + nodeScheme, sourceAccountTrie, elems, addrs := makeAccountTrieWithAddresses(100, scheme) + rootA := sourceAccountTrie.Hash() + numA := uint64(100) + + goodAddr := addrs[0] + corruptAddr := addrs[1] + + type balBlock struct { + header *types.Header + bal rlp.RawValue + } + + db := rawdb.NewMemoryDatabase() + emptyHash := common.Hash{} + zero := uint64(0) + + // Write the header and canonical hash for block A so the reorg-detection + // canonical-lookup in Sync passes (otherwise it'd treat A as reorged out + // and reset instead of running catchUp). + pivotAHeader := &types.Header{ + Number: new(big.Int).SetUint64(numA), Root: rootA, Difficulty: common.Big0, + BaseFee: common.Big0, WithdrawalsHash: &emptyHash, + BlobGasUsed: &zero, ExcessBlobGas: &zero, + ParentBeaconRoot: &emptyHash, RequestsHash: &emptyHash, + } + rawdb.WriteHeader(db, pivotAHeader) + rawdb.WriteCanonicalHash(db, pivotAHeader.Hash(), numA) + pivotA := pivotAHeader + + // Build three sequential BAL blocks (A+1, A+2, A+3). The first two touch + // goodAddr, the third touches corruptAddr so that block's apply fails + // once we've corrupted that account's snapshot. + blocks := make([]balBlock, 3) + for i := 0; i < 3; i++ { + blockNum := numA + uint64(i) + 1 + target := goodAddr + if i == 2 { + target = corruptAddr + } + balance := uint256.NewInt(uint64(1000 * (i + 1))) + + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, target, balance) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + var b bal.BlockAccessList + if err := rlp.DecodeBytes(buf.Bytes(), &b); err != nil { + t.Fatal(err) + } + balHash := b.Hash() + header := &types.Header{ + Number: new(big.Int).SetUint64(blockNum), Difficulty: common.Big0, + BaseFee: common.Big0, WithdrawalsHash: &emptyHash, + BlobGasUsed: &zero, ExcessBlobGas: &zero, + ParentBeaconRoot: &emptyHash, RequestsHash: &emptyHash, + BlockAccessListHash: &balHash, + } + rawdb.WriteHeader(db, header) + rawdb.WriteCanonicalHash(db, header.Hash(), blockNum) + blocks[i] = balBlock{header: header, bal: buf.Bytes()} + } + + // First sync: complete sync to A so persisted state has previousPivot=A, + // flat state covers all accounts. + { + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + syncer := newSyncerV2(db, nodeScheme) + src := newTestPeerV2("seed", t, term) + src.accountTrie = sourceAccountTrie.Copy() + src.accountValues = elems + syncer.Register(src) + src.remote = syncer + if err := syncer.Sync(pivotA, cancel); err != nil { + t.Fatalf("seed sync failed: %v", err) + } + } + + // Corrupt the flat-state snapshot for corruptAddr so applyAccessList will + // fail when block A+3's BAL touches it. types.FullAccount rejects this + // payload as undecodable. + rawdb.WriteAccountSnapshot(db, crypto.Keccak256Hash(corruptAddr[:]), []byte{0xff, 0xff, 0xff, 0xff}) + + // Second sync: target is A+3. catchUp should apply A+1 and A+2 (good + // account), persist after each, then fail on A+3 (corrupt account). + pivotB := blocks[2].header + balsByHash := map[common.Hash]rlp.RawValue{ + blocks[0].header.Hash(): blocks[0].bal, + blocks[1].header.Hash(): blocks[1].bal, + blocks[2].header.Hash(): blocks[2].bal, + } + + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + syncer := newSyncerV2(db, nodeScheme) + src := newTestPeerV2("catchup", t, term) + src.accountTrie = sourceAccountTrie.Copy() + src.accountValues = elems + src.accessLists = balsByHash + syncer.Register(src) + src.remote = syncer + + if err := syncer.Sync(pivotB, cancel); err == nil { + t.Fatal("expected Sync to fail when applyAccessList hits corrupt flat state") + } + + // Persisted previousPivot should now reflect the last successfully applied + // block (A+2). Without per-iteration saves, it would still be at A. + loader := newSyncerV2(db, nodeScheme) + loader.loadSyncStatus() + if loader.previousPivot == nil { + t.Fatal("expected persisted previousPivot to be set after partial catchUp") + } + wantHash := blocks[1].header.Hash() + if loader.previousPivot.Hash() != wantHash { + t.Errorf("persisted previousPivot mismatch after partial catchUp: got %v, want %v (block A+2)", + loader.previousPivot.Hash(), wantHash) + } +} + +// TestSyncStatusMarkedCompleteAfterCompletion verifies that after a full sync +// completes, the persisted sync status has Complete=true. This lets a +// subsequent Sync call distinguish "already done" from "fresh node" and skip. +func TestSyncStatusMarkedCompleteAfterCompletion(t *testing.T) { + t.Parallel() + testSyncStatusMarkedCompleteAfterCompletion(t, rawdb.HashScheme) + testSyncStatusMarkedCompleteAfterCompletion(t, rawdb.PathScheme) +} + +func testSyncStatusMarkedCompleteAfterCompletion(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) term = func() { once.Do(func() { close(cancel) }) } ) nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(100, scheme) - source := newTestPeerV2("source", t, term) - source.accountTrie = sourceAccountTrie.Copy() - source.accountValues = elems - source.accountRequestHandler = func(t *testPeerV2, requestId uint64, root common.Hash, origin common.Hash, limit common.Hash, cap int) error { - var ( - keys []common.Hash - vals [][]byte - ) - for _, entry := range t.accountValues { - if bytes.Compare(entry.k, origin[:]) < 0 { - continue - } - if bytes.Compare(entry.k, limit[:]) > 0 { - continue - } - keys = append(keys, common.BytesToHash(entry.k)) - vals = append(vals, entry.v) + mkSource := func(name string) *testPeerV2 { + source := newTestPeerV2(name, t, term) + source.accountTrie = sourceAccountTrie.Copy() + source.accountValues = elems + return source + } + syncer := setupSyncerV2(nodeScheme, mkSource("source")) + pivot := mkPivot(0, sourceAccountTrie.Hash()) + if err := syncer.Sync(pivot, cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } + + // After successful sync, persisted status should be present with + // Complete=true and the pivot we synced to. + loader := newSyncerV2(syncer.db, nodeScheme) + loader.loadSyncStatus() + if !loader.complete { + t.Fatal("expected persisted status to have Complete=true after successful sync") + } + if loader.previousPivot == nil || loader.previousPivot.Hash() != pivot.Hash() { + t.Fatalf("expected persisted pivot to match synced pivot") + } +} + +// TestSyncSkipsIfAlreadyComplete verifies that a follow-up Sync call for the +// same pivot returns immediately without doing any work, since the persisted +// status indicates the sync is already complete. To prove the skip path actually +// fires, we deliberately wipe the flat state between the two calls. If it skips, +// Sync returns nil without touching flat state. If it doesn't kip, GenerateTrie +// would run against an empty snapshot and fail with a root mismatch. +func TestSyncSkipsIfAlreadyComplete(t *testing.T) { + t.Parallel() + + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(100, rawdb.HashScheme) + pivot := mkPivot(0, sourceAccountTrie.Hash()) + + var ( + once1 sync.Once + cancel1 = make(chan struct{}) + term1 = func() { once1.Do(func() { close(cancel1) }) } + ) + src1 := newTestPeerV2("source1", t, term1) + src1.accountTrie = sourceAccountTrie.Copy() + src1.accountValues = elems + syncer := setupSyncerV2(nodeScheme, src1) + if err := syncer.Sync(pivot, cancel1); err != nil { + t.Fatalf("first sync failed: %v", err) + } + + // Wipe the flat state. The persisted status (with Complete=true) stays. + if err := syncer.db.DeleteRange(rawdb.SnapshotAccountPrefix, []byte{rawdb.SnapshotAccountPrefix[0] + 1}); err != nil { + t.Fatalf("failed to wipe account snapshot: %v", err) + } + if err := syncer.db.DeleteRange(rawdb.SnapshotStoragePrefix, []byte{rawdb.SnapshotStoragePrefix[0] + 1}); err != nil { + t.Fatalf("failed to wipe storage snapshot: %v", err) + } + + // Second sync must take the skip path. If it didn't, the empty flat + // state would cause GenerateTrie to fail with a root mismatch. + cancel2 := make(chan struct{}) + if err := syncer.Sync(pivot, cancel2); err != nil { + t.Fatalf("second sync should have skipped, got error: %v", err) + } +} + +// TestInterruptedRebuildRecovery verifies that if sync is interrupted after +// download completes but before trie rebuild finishes, the next Sync() call +// re-runs the download (which completes immediately) and rebuild. +func TestInterruptedRebuildRecovery(t *testing.T) { + t.Parallel() + + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(100, rawdb.HashScheme) + root := sourceAccountTrie.Hash() + + // First run: complete download, save status, simulate interruption + // before rebuild by calling downloadState() directly + var ( + once1 sync.Once + cancel1 = make(chan struct{}) + term1 = func() { once1.Do(func() { close(cancel1) }) } + ) + db := rawdb.NewMemoryDatabase() + syncer1 := newSyncerV2(db, nodeScheme) + src1 := newTestPeerV2("source1", t, term1) + src1.accountTrie = sourceAccountTrie.Copy() + src1.accountValues = elems + syncer1.Register(src1) + src1.remote = syncer1 + pivot := mkPivot(0, root) + syncer1.pivot = pivot + syncer1.previousPivot = pivot // Sync sets this before downloadState + syncer1.loadSyncStatus() + + if err := syncer1.downloadState(cancel1); err != nil { + t.Fatalf("download failed: %v", err) + } + // Save status (simulating what Sync's defer does) + for _, task := range syncer1.tasks { + syncer1.forwardAccountTask(task) + } + syncer1.cleanAccountTasks() + syncer1.saveSyncStatus() + + // Status should exist (rebuild hasn't run yet) + if rawdb.ReadSnapshotSyncStatus(db) == nil { + t.Fatal("sync status should exist after download") + } + // Second run: full Sync should detect tasks are done, run rebuild + var ( + once2 sync.Once + cancel2 = make(chan struct{}) + term2 = func() { once2.Do(func() { close(cancel2) }) } + ) + syncer2 := newSyncerV2(db, nodeScheme) + src2 := newTestPeerV2("source2", t, term2) + src2.accountTrie = sourceAccountTrie.Copy() + src2.accountValues = elems + syncer2.Register(src2) + src2.remote = syncer2 + + if err := syncer2.Sync(mkPivot(0, root), cancel2); err != nil { + t.Fatalf("resumed sync failed: %v", err) + } + // After rebuild completes, status should be marked Complete=true. + loader := newSyncerV2(db, nodeScheme) + loader.loadSyncStatus() + if !loader.complete { + t.Fatal("sync status should be marked Complete=true after rebuild completes") + } +} + +// TestFetchAccessListsMultiplePeers verifies that fetch distributes work +// across multiple idle peers. +func TestFetchAccessListsMultiplePeers(t *testing.T) { + t.Parallel() + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + + // Create enough BALs to potentially split across peers + var hashes []common.Hash + bals := make(map[common.Hash]rlp.RawValue) + for i := 0; i < 10; i++ { + h := common.HexToHash(fmt.Sprintf("0x%02x", i+1)) + hashes = append(hashes, h) + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, common.HexToAddress("0xaa"), uint256.NewInt(uint64(i))) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) } - proof := trienode.NewProofSet() - if err := t.accountTrie.Prove(origin[:], proof); err != nil { - t.logger.Error("Could not prove origin", "origin", origin, "error", err) + bals[h] = buf.Bytes() + } + mkSource := func(name string) *testPeerV2 { + source := newTestPeerV2(name, t, term) + source.accessLists = bals + return source + } + syncer := setupSyncerV2(rawdb.HashScheme, mkSource("peer-a"), mkSource("peer-b"), mkSource("peer-c")) + results, err := syncer.fetchAccessLists(hashes, makeAccessListHeaders(bals), cancel) + if err != nil { + t.Fatalf("fetchAccessLists failed: %v", err) + } + if len(results) != len(hashes) { + t.Fatalf("result count mismatch: got %d, want %d", len(results), len(hashes)) + } + // Verify results match expected content in request order + for i, h := range hashes { + if !bytes.Equal(results[i], bals[h]) { + t.Errorf("result %d content mismatch for hash %v", i, h) } - for _, entry := range t.accountValues { - if err := t.accountTrie.Prove(entry.k, proof); err != nil { - t.logger.Error("Could not prove item", "error", err) - } + } +} + +// TestFetchAccessListsPeerTimeout verifies that timed-out requests are retried +// with a different peer. +func TestFetchAccessListsPeerTimeout(t *testing.T) { + t.Parallel() + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + hashes := []common.Hash{common.HexToHash("0x01")} + bals := make(map[common.Hash]rlp.RawValue) + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, common.HexToAddress("0xaa"), uint256.NewInt(42)) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + bals[hashes[0]] = buf.Bytes() + + // First peer never responds + nonResponsive := newTestPeerV2("non-responsive", t, term) + nonResponsive.accessListRequestHandler = func(t *testPeerV2, id uint64, hashes []common.Hash, max int) error { + // Don't respond — let it time out + return nil + } + + // Second peer serves correctly + good := newTestPeerV2("good", t, term) + good.accessLists = bals + syncer := setupSyncerV2(rawdb.HashScheme, nonResponsive, good) + syncer.rates.OverrideTTLLimit = time.Millisecond // Fast timeout + results, err := syncer.fetchAccessLists(hashes, makeAccessListHeaders(bals), cancel) + if err != nil { + t.Fatalf("fetchAccessLists failed: %v", err) + } + if len(results) != 1 { + t.Fatalf("result count mismatch: got %d, want 1", len(results)) + } +} + +// TestFetchAccessListsPeerRejection verifies that peers returning empty +// responses are marked stateless and work is retried with another peer. +func TestFetchAccessListsPeerRejection(t *testing.T) { + t.Parallel() + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + hashes := []common.Hash{common.HexToHash("0x01")} + bals := make(map[common.Hash]rlp.RawValue) + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, common.HexToAddress("0xaa"), uint256.NewInt(42)) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + bals[hashes[0]] = buf.Bytes() + + // First peer rejects (has no BAL data, returns empty) + // accessLists is nil, so defaultAccessListRequestHandler returns empty + rejector := newTestPeerV2("rejector", t, term) + + // Second peer serves correctly + good := newTestPeerV2("good", t, term) + good.accessLists = bals + syncer := setupSyncerV2(rawdb.HashScheme, rejector, good) + results, err := syncer.fetchAccessLists(hashes, makeAccessListHeaders(bals), cancel) + if err != nil { + t.Fatalf("fetchAccessLists failed: %v", err) + } + if len(results) != 1 { + t.Fatalf("result count mismatch: got %d, want 1", len(results)) + } +} + +// TestFetchAccessListsCancel verifies that fetchAccessLists returns promptly +// when cancelled. +func TestFetchAccessListsCancel(t *testing.T) { + t.Parallel() + cancel := make(chan struct{}) + + // Peer that never responds + nonResponsive := newTestPeerV2("non-responsive", t, func() {}) + nonResponsive.accessListRequestHandler = func(t *testPeerV2, id uint64, hashes []common.Hash, max int) error { + return nil // never deliver + } + syncer := setupSyncerV2(rawdb.HashScheme, nonResponsive) + hashes := []common.Hash{common.HexToHash("0x01")} + + // Cancel after a short delay + go func() { + time.Sleep(50 * time.Millisecond) + close(cancel) + }() + _, err := syncer.fetchAccessLists(hashes, nil, cancel) + if err != ErrCancelled { + t.Fatalf("expected ErrCancelled, got %v", err) + } +} + +// TestFetchAccessListsPeerDrop verifies that dropping a peer mid-request +// causes the request to be retried with a different peer. +func TestFetchAccessListsPeerDrop(t *testing.T) { + t.Parallel() + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + hashes := []common.Hash{common.HexToHash("0x01")} + bals := make(map[common.Hash]rlp.RawValue) + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, common.HexToAddress("0xaa"), uint256.NewInt(42)) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + bals[hashes[0]] = buf.Bytes() + + // First peer will be dropped mid-request + dropped := newTestPeerV2("dropped", t, term) + dropped.accessListRequestHandler = func(tp *testPeerV2, id uint64, hashes []common.Hash, max int) error { + // Simulate peer dropping by unregistering + tp.remote.Unregister(tp.id) + return nil + } + + // Second peer serves correctly + good := newTestPeerV2("good", t, term) + good.accessLists = bals + syncer := setupSyncerV2(rawdb.HashScheme, dropped, good) + results, err := syncer.fetchAccessLists(hashes, makeAccessListHeaders(bals), cancel) + if err != nil { + t.Fatalf("fetchAccessLists failed: %v", err) + } + if len(results) != 1 { + t.Fatalf("result count mismatch: got %d, want 1", len(results)) + } +} + +// TestFetchAccessListsShortResponse verifies that when a peer returns fewer +// BALs than requested (a short/partial response), the un-served hashes are +// retried and eventually all results are collected. +func TestFetchAccessListsShortResponse(t *testing.T) { + t.Parallel() + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + + // Request 4 hashes but the peer only returns the first 2. + hashes := []common.Hash{ + common.HexToHash("0x01"), + common.HexToHash("0x02"), + common.HexToHash("0x03"), + common.HexToHash("0x04"), + } + allBALs := make(map[common.Hash]rlp.RawValue) + for _, h := range hashes { + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, common.HexToAddress("0xaa"), uint256.NewInt(uint64(h[31]))) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) } - if len(keys) > 2 { - keys = append(keys[:1], keys[2:]...) - vals = append(vals[:1], vals[2:]...) + allBALs[h] = buf.Bytes() + } + + // shortPeer returns only the first 2 BALs regardless of how many are + // requested. This simulates a peer that truncates its response (e.g., + // hitting the 2 MiB response soft limit). + shortPeer := newTestPeerV2("short", t, term) + shortPeer.accessListRequestHandler = func(tp *testPeerV2, id uint64, reqHashes []common.Hash, max int) error { + // Return only the first 2 of however many were requested. + limit := 2 + if len(reqHashes) < limit { + limit = len(reqHashes) } - if err := t.remote.OnAccounts(t, requestId, keys, vals, proof.List()); err != nil { - t.logger.Info("remote error on delivery (as expected)", "error", err) - t.term() + var results []rlp.RawValue + for i := 0; i < limit; i++ { + results = append(results, allBALs[reqHashes[i]]) + } + rawList, _ := rlp.EncodeToRawList(results) + if err := tp.remote.OnAccessLists(tp, id, rawList); err != nil { + tp.test.Errorf("delivery rejected: %v", err) + tp.term() } return nil } - syncer := setupSyncerV2(nodeScheme, source) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err == nil { - t.Fatal("No error returned from incomplete/cancelled sync") + syncer := setupSyncerV2(rawdb.HashScheme, shortPeer) + + // Pre-seed the rate tracker so the peer's capacity for AccessListsMsg is + // high enough to get all 4 hashes assigned in a single request. Without + // this, the default capacity is 1, so the peer would only get 1 hash per + // round and the short-response scenario never triggers. + syncer.rates.Update(shortPeer.id, AccessListsMsg, time.Millisecond, 100) + + // If the bug exists, this will hang. + done := make(chan struct{}) + var ( + results []rlp.RawValue + fetchErr error + ) + go func() { + results, fetchErr = syncer.fetchAccessLists(hashes, makeAccessListHeaders(allBALs), cancel) + close(done) + }() + + select { + case <-done: + // fetchAccessLists returned + case <-time.After(5 * time.Second): + t.Fatal("fetchAccessLists has hung. This means unserved hashes were never re-added to pending.") + } + if fetchErr != nil { + t.Fatalf("fetchAccessLists failed: %v", fetchErr) + } + if len(results) != len(hashes) { + t.Fatalf("result count mismatch: got %d, want %d", len(results), len(hashes)) + } + + // Verify all results are non-nil and in correct order + for i, h := range hashes { + if results[i] == nil { + t.Errorf("result %d (hash %v) is nil", i, h) + } } } + +// TestFetchAccessListsEmptyPlaceholder verifies that when a peer returns +// rlp.EmptyString placeholders for BALs it doesn't have, those placeholders +// are not silently accepted as valid results. +func TestFetchAccessListsEmptyPlaceholder(t *testing.T) { + t.Parallel() + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + hashes := []common.Hash{ + common.HexToHash("0x01"), + common.HexToHash("0x02"), + common.HexToHash("0x03"), + } + + // Build BALs for all 3 hashes + allBALs := make(map[common.Hash]rlp.RawValue) + for _, h := range hashes { + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, common.HexToAddress("0xaa"), uint256.NewInt(uint64(h[31]))) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + allBALs[h] = buf.Bytes() + } + + // partialPeer has BALs for hashes 0 and 2. The server + // handler returns rlp.EmptyString for the missing BAL. + partialPeer := newTestPeerV2("partial", t, term) + partialPeer.accessListRequestHandler = func(tp *testPeerV2, id uint64, reqHashes []common.Hash, max int) error { + var results []rlp.RawValue + for _, h := range reqHashes { + if raw, ok := allBALs[h]; ok && h != hashes[1] { + results = append(results, raw) + } else { + results = append(results, rlp.EmptyString) + } + } + rawList, _ := rlp.EncodeToRawList(results) + if err := tp.remote.OnAccessLists(tp, id, rawList); err != nil { + tp.test.Errorf("delivery rejected: %v", err) + tp.term() + } + return nil + } + + // fullPeer has all BALs + fullPeer := newTestPeerV2("full", t, term) + fullPeer.accessLists = allBALs + syncer := setupSyncerV2(rawdb.HashScheme, partialPeer, fullPeer) + + // Pre-seed capacity so partialPeer gets all 3 hashes + syncer.rates.Update(partialPeer.id, AccessListsMsg, time.Millisecond, 100) + done := make(chan struct{}) + var ( + results []rlp.RawValue + fetchErr error + ) + go func() { + results, fetchErr = syncer.fetchAccessLists(hashes, makeAccessListHeaders(allBALs), cancel) + close(done) + }() + + // Wait for fetch to complete + select { + case <-done: + case <-time.After(5 * time.Second): + t.Fatal("fetchAccessLists hung") + } + if fetchErr != nil { + t.Fatalf("fetchAccessLists failed: %v", fetchErr) + } + + // Verify the results are valid. + for i, raw := range results { + var accessList bal.BlockAccessList + if err := rlp.DecodeBytes(raw, &accessList); err != nil { + t.Errorf("result %d (hash %v) is not a valid BAL: %v (got raw bytes %x)", + i, hashes[i], err, raw) + } + } +} + +// TestFetchAccessListsRejectsBadBAL verifies that when a peer delivers a BAL +// whose hash doesn't match the canonical block header, fetchAccessLists marks +// the peer stateless, drops the response, and surfaces the exhaustion error +// once no other peers can serve the work. +func TestFetchAccessListsRejectsBadBAL(t *testing.T) { + t.Parallel() + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + hash := common.HexToHash("0x01") + hashes := []common.Hash{hash} + + // Build a BAL we'll actually serve. + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, common.HexToAddress("0xaa"), uint256.NewInt(42)) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + served := buf.Bytes() + + // Build a header whose BlockAccessListHash points at something else, so + // the served BAL fails verification. + mismatch := common.HexToHash("0xdeadbeef") + headers := map[common.Hash]*types.Header{ + hash: {BlockAccessListHash: &mismatch}, + } + + peer := newTestPeerV2("liar", t, term) + peer.accessLists = map[common.Hash]rlp.RawValue{hash: served} + syncer := setupSyncerV2(rawdb.HashScheme, peer) + + results, err := syncer.fetchAccessLists(hashes, headers, cancel) + if !errors.Is(err, errAccessListPeersExhausted) { + t.Fatalf("expected errAccessListPeersExhausted, got %v", err) + } + if results != nil { + t.Errorf("expected nil results on error, got %v", results) + } + syncer.lock.RLock() + _, stateless := syncer.statelessPeers[peer.id] + syncer.lock.RUnlock() + if !stateless { + t.Error("expected liar peer to be marked stateless after bad BAL") + } +} + +// TestCatchUpRetriesOnBadBAL verifies that when one peer serves a BAL that +// fails verification but another serves a valid one, fetchAccessLists routes +// the work around the bad peer and returns the verified BAL. +func TestCatchUpRetriesOnBadBAL(t *testing.T) { + t.Parallel() + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + hash := common.HexToHash("0x01") + hashes := []common.Hash{hash} + + cb := bal.NewConstructionBlockAccessList() + cb.BalanceChange(0, common.HexToAddress("0xaa"), uint256.NewInt(42)) + var buf bytes.Buffer + if err := cb.EncodeRLP(&buf); err != nil { + t.Fatal(err) + } + good := buf.Bytes() + + // A second BAL with different content used as the "bad" payload. It + // decodes cleanly but its hash will not match the header. + other := bal.NewConstructionBlockAccessList() + other.BalanceChange(0, common.HexToAddress("0xbb"), uint256.NewInt(99)) + var otherBuf bytes.Buffer + if err := other.EncodeRLP(&otherBuf); err != nil { + t.Fatal(err) + } + bad := otherBuf.Bytes() + + headers := makeAccessListHeaders(map[common.Hash]rlp.RawValue{hash: good}) + + liar := newTestPeerV2("liar", t, term) + liar.accessLists = map[common.Hash]rlp.RawValue{hash: bad} + honest := newTestPeerV2("honest", t, term) + honest.accessLists = map[common.Hash]rlp.RawValue{hash: good} + + syncer := setupSyncerV2(rawdb.HashScheme, liar, honest) + // Bias the capacity sort so the liar is asked first, exercising the + // reject-and-retry path rather than getting lucky on assignment order. + syncer.rates.Update(liar.id, AccessListsMsg, time.Millisecond, 1000) + + results, err := syncer.fetchAccessLists(hashes, headers, cancel) + if err != nil { + t.Fatalf("fetchAccessLists failed: %v", err) + } + if !bytes.Equal(results[0], good) { + t.Errorf("expected the honest BAL, got %x", results[0]) + } + syncer.lock.RLock() + _, liarStateless := syncer.statelessPeers[liar.id] + _, honestStateless := syncer.statelessPeers[honest.id] + syncer.lock.RUnlock() + if !liarStateless { + t.Error("expected liar to be marked stateless") + } + if honestStateless { + t.Error("expected honest peer to remain in good standing") + } +} + +// makeStorageTrieFromSlots builds a storage trie for owner from raw slot +// key->value pairs, using the exact on-disk encoding the flat snapshot and the +// trie rebuild expect: each leaf is keyed by keccak256(slotKey) and its value is +// rlp(TrimLeftZeroes(value)). Zero-valued slots are skipped (an unset slot has +// no leaf). It returns the storage root, the dirty node set, and the sorted +// snapshot leaves (which a test peer serves verbatim). +func makeStorageTrieFromSlots(db *triedb.Database, owner common.Hash, slots map[common.Hash]common.Hash) (common.Hash, *trienode.NodeSet, []*kv) { + st, _ := trie.New(trie.StorageTrieID(types.EmptyRootHash, owner, types.EmptyRootHash), db) + var entries []*kv + for rawKey, value := range slots { + if value == (common.Hash{}) { + continue // unset slot: no leaf + } + slotHash := crypto.Keccak256Hash(rawKey[:]) + enc, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(value[:])) + st.MustUpdate(slotHash[:], enc) + entries = append(entries, &kv{slotHash[:], enc}) + } + slices.SortFunc(entries, (*kv).cmp) + root, nodes := st.Commit(false) + return root, nodes, entries +} + +// makeStateWithStorageContract builds an account trie holding the given +// storage-less accounts plus a single contract account whose storage trie is +// built from slots. Everything is committed into a fresh triedb so the tries +// can be served by a test peer. It returns the recreated account trie, the +// sorted account leaves, the recreated contract storage trie, the sorted +// storage leaves, and the resulting state root. +func makeStateWithStorageContract(scheme string, plain []*kv, contractAddr common.Address, contract types.StateAccount, slots map[common.Hash]common.Hash) (*trie.Trie, []*kv, *trie.Trie, []*kv, common.Hash) { + db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), newDbConfig(scheme)) + accTrie := trie.NewEmpty(db) + merged := trienode.NewMergedNodeSet() + + // Contract storage trie. + contractHash := crypto.Keccak256Hash(contractAddr[:]) + stRoot, stNodes, stEntries := makeStorageTrieFromSlots(db, contractHash, slots) + if stNodes != nil { + merged.Merge(stNodes) + } + + // Contract account leaf carries the (live) storage root. + contract.Root = stRoot + cval, _ := rlp.EncodeToBytes(&contract) + accTrie.MustUpdate(contractHash[:], cval) + accEntries := []*kv{{contractHash[:], cval}} + + // Storage-less filler accounts. + for _, e := range plain { + accTrie.MustUpdate(e.k, e.v) + accEntries = append(accEntries, &kv{e.k, e.v}) + } + slices.SortFunc(accEntries, (*kv).cmp) + + // Commit account + storage nodes together, then re-open for serving. + root, set := accTrie.Commit(true) + merged.Merge(set) + db.Update(root, types.EmptyRootHash, 0, merged, triedb.NewStateSet()) + + accTrie, _ = trie.New(trie.StateTrieID(root), db) + stTrie, _ := trie.New(trie.StorageTrieID(root, contractHash, stRoot), db) + return accTrie, accEntries, stTrie, stEntries, root +} + +// TestCatchUpAppliesStorageBALs exercises the snap/2 catch-up path with a BAL +// that mutates storage slots (not just balances): a non-zero write to a fresh +// slot, an overwrite of an existing slot, a write of zero (deletion), and a +// multi-tx write where the post-block value wins. +// +// It fully syncs pivot A (flat-state download + trie rebuild), then moves the +// pivot to A+1. The move triggers catchUp, which fetches the A+1 BAL, applies +// the storage diffs to the flat state, and rebuilds the trie. The rebuild +// verifies the recomputed root against the pivot's expected post-catch-up root, +// so a successful Sync proves the storage mutations were applied in the exact +// encoding the trie rebuild consumes. verifyTrie re-walks the result as an +// independent confirmation. +func TestCatchUpAppliesStorageBALs(t *testing.T) { + t.Parallel() + testCatchUpAppliesStorageBALs(t, rawdb.HashScheme) + testCatchUpAppliesStorageBALs(t, rawdb.PathScheme) +} + +func testCatchUpAppliesStorageBALs(t *testing.T, scheme string) { + // The contract whose storage the A+1 BAL mutates. + contractAddr := common.HexToAddress("0x00000000000000000000000000000000c0ffee01") + contractHash := crypto.Keccak256Hash(contractAddr[:]) + + // Raw storage slot keys. + var ( + slotKeep = common.HexToHash("0x01") // untouched by the BAL + slotOver = common.HexToHash("0x02") // overwritten with a new non-zero value + slotZero = common.HexToHash("0x03") // written to zero (deletion) + slotNew = common.HexToHash("0x04") // unset in A, written non-zero in A+1 + slotMultiTx = common.HexToHash("0x05") // written several times within the block + ) + // Slot values. Multi-byte values force RLP length prefixes, so the encoding + // differs sharply from the raw 32-byte form and a format mismatch surfaces. + var ( + vKeep = common.HexToHash("0x1111") + vOver0 = common.HexToHash("0x2222") + vOver1 = common.HexToHash("0x22220000aaaa") + vZero0 = common.HexToHash("0x3333") + vNew = common.HexToHash("0x4444") + vMulti0 = common.HexToHash("0x5555") + vMultiMid = common.HexToHash("0x5556") + vMultiFinal = common.HexToHash("0x55570000bbbb") + ) + // Storage at pivot A. + slotsA := map[common.Hash]common.Hash{ + slotKeep: vKeep, + slotOver: vOver0, + slotZero: vZero0, + slotMultiTx: vMulti0, + } + // Expected storage at pivot A+1 after applying the BAL writes below. + slotsB := map[common.Hash]common.Hash{ + slotKeep: vKeep, // unchanged + slotOver: vOver1, // overwritten + slotNew: vNew, // newly written + slotMultiTx: vMultiFinal, // post-block (highest-tx) value wins + // slotZero deleted + } + contractTmpl := types.StateAccount{ + Nonce: 7, + Balance: uint256.NewInt(123456), + CodeHash: types.EmptyCodeHash[:], + } + + // Storage-less filler accounts, identical in A and A+1. + _, _, plain, _ := makeAccountTrieWithAddresses(20, scheme) + + // Build the state at pivot A (served by the seed peer) and the expected + // state at pivot A+1 (only its root is needed). + accTrieA, accElemsA, stTrieA, stElemsA, rootA := makeStateWithStorageContract(scheme, plain, contractAddr, contractTmpl, slotsA) + _, _, _, _, rootB := makeStateWithStorageContract(scheme, plain, contractAddr, contractTmpl, slotsB) + if rootA == rootB { + t.Fatal("test bug: pivot A and A+1 must have different state roots") + } + + // Build the A+1 BAL describing the storage mutations. + cb := bal.NewConstructionBlockAccessList() + cb.StorageWrite(0, contractAddr, slotOver, vOver1) // overwrite + cb.StorageWrite(0, contractAddr, slotZero, common.Hash{}) // write zero -> delete + cb.StorageWrite(0, contractAddr, slotNew, vNew) // new non-zero + cb.StorageWrite(0, contractAddr, slotMultiTx, vMultiMid) // tx 0 + cb.StorageWrite(2, contractAddr, slotMultiTx, vMultiFinal) // tx 2 (post-block) + var balBuf bytes.Buffer + if err := cb.EncodeRLP(&balBuf); err != nil { + t.Fatal(err) + } + var decodedBAL bal.BlockAccessList + if err := rlp.DecodeBytes(balBuf.Bytes(), &decodedBAL); err != nil { + t.Fatal(err) + } + balHash := decodedBAL.Hash() + + // Chain headers. The pivot-A header is the same object passed to the first + // Sync, so the follow-up Sync's reorg check sees A as still-canonical and + // runs catchUp instead of resetting. The A+1 header carries the BAL hash + // (verified during catch-up) and the expected post-catch-up state root + // (verified by the trie rebuild). + db := rawdb.NewMemoryDatabase() + numA := uint64(128) + emptyH := common.Hash{} + zero := uint64(0) + hdrA := &types.Header{ + Number: new(big.Int).SetUint64(numA), Root: rootA, Difficulty: common.Big0, + BaseFee: common.Big0, WithdrawalsHash: &emptyH, + BlobGasUsed: &zero, ExcessBlobGas: &zero, + ParentBeaconRoot: &emptyH, RequestsHash: &emptyH, + } + rawdb.WriteHeader(db, hdrA) + rawdb.WriteCanonicalHash(db, hdrA.Hash(), numA) + + hdrB := &types.Header{ + Number: new(big.Int).SetUint64(numA + 1), Root: rootB, Difficulty: common.Big0, + BaseFee: common.Big0, WithdrawalsHash: &emptyH, + BlobGasUsed: &zero, ExcessBlobGas: &zero, + ParentBeaconRoot: &emptyH, RequestsHash: &emptyH, + BlockAccessListHash: &balHash, + } + rawdb.WriteHeader(db, hdrB) + rawdb.WriteCanonicalHash(db, hdrB.Hash(), numA+1) + + // Sync 1: full flat-state download + trie rebuild against pivot A. + { + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + syncer := newSyncerV2(db, scheme) + src := newTestPeerV2("seed", t, term) + src.accountTrie = accTrieA.Copy() + src.accountValues = accElemsA + src.setStorageTries(map[common.Hash]*trie.Trie{contractHash: stTrieA}) + src.storageValues = map[common.Hash][]*kv{contractHash: stElemsA} + syncer.Register(src) + src.remote = syncer + done := checkStall(t, term) + if err := syncer.Sync(hdrA, cancel); err != nil { + t.Fatalf("pivot A sync failed: %v", err) + } + close(done) + } + // Sanity: the rebuilt trie for pivot A is complete and matches rootA. This + // also confirms the test fixture itself is internally consistent. + verifyTrie(scheme, db, rootA, t) + + // Sync 2: the pivot moves to A+1, exercising the BAL catch-up path. + { + var ( + once sync.Once + cancel = make(chan struct{}) + term = func() { once.Do(func() { close(cancel) }) } + ) + syncer := newSyncerV2(db, scheme) + src := newTestPeerV2("catchup", t, term) + // Pivot A is fully synced, so no download tasks remain; the peer only + // needs to serve the A+1 BAL. The trie data is provided defensively in + // case a stray account request is issued. + src.accountTrie = accTrieA.Copy() + src.accountValues = accElemsA + src.accessLists = map[common.Hash]rlp.RawValue{hdrB.Hash(): balBuf.Bytes()} + syncer.Register(src) + src.remote = syncer + done := checkStall(t, term) + if err := syncer.Sync(hdrB, cancel); err != nil { + t.Fatalf("pivot A+1 catch-up sync failed: %v", err) + } + close(done) + } + + // A successful Sync already means GenerateTrie reproduced rootB from the + // BAL-updated flat state (it errors on root mismatch). Re-walk the trie as + // an independent confirmation that rootB is fully materialized. + verifyTrie(scheme, db, rootB, t) + + // Spot-check each storage mutation landed in the flat snapshot in the + // canonical encoding. + checkSlot := func(raw common.Hash, want common.Hash, present bool) { + t.Helper() + got := rawdb.ReadStorageSnapshot(db, contractHash, crypto.Keccak256Hash(raw[:])) + if !present { + if len(got) != 0 { + t.Errorf("slot %x: expected deletion, got %x", raw, got) + } + return + } + wantEnc, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(want[:])) + if !bytes.Equal(got, wantEnc) { + t.Errorf("slot %x: got %x, want %x", raw, got, wantEnc) + } + } + checkSlot(slotKeep, vKeep, true) + checkSlot(slotOver, vOver1, true) + checkSlot(slotZero, common.Hash{}, false) + checkSlot(slotNew, vNew, true) + checkSlot(slotMultiTx, vMultiFinal, true) +}