From ad68ce261b848726daac90efd93f26d673b167ad Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 17 Jun 2026 09:55:36 +0800 Subject: [PATCH] eth: reserve peer slot for usable snap peer (#35180) This PR improves the slot reservation logic in the context of snap/2. Geth has the mechanism to reserve roughly half the peer slots for peers supporting the snap protocol if snap syncing is needed by local node. With the context of snap/2, this mechanism should be changed that: we reserve the slot for the "usable snap peer", not blindly for peer with snap extension enabled (such as legacy snap/1, which can't serve the snap/2). --- eth/downloader/downloader.go | 12 ++++++++++++ eth/downloader/metrics.go | 4 ++++ eth/handler.go | 16 +++++++++++----- eth/peerset.go | 25 ++++++++++++++----------- 4 files changed, 41 insertions(+), 16 deletions(-) diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index af4c4bb1f6..cfa1c02e9a 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -1132,11 +1132,23 @@ func (d *Downloader) DeliverSnapPacket(peer *snap.Peer, packet snap.Packet) erro // snap/2 syncer skips snap/1-only peers, which cannot answer its BAL requests. func (d *Downloader) RegisterSnapPeer(p *snap.Peer) error { if p.Version() < d.snapSyncer.Version() { + // The peer speaks an older snap version than the active syncer needs + // (e.g. snap/1 while we sync via snap/2). We still serve it, but it + // cannot answer our requests, so it is not registered for syncing. + // Surface it so an operator can tell a stalled sync from a quiet one. + snapPeerSkipMeter.Mark(1) + log.Debug("Skipping snap peer below syncer version", "peer", p.ID(), "version", p.Version(), "required", d.snapSyncer.Version()) return nil } return d.snapSyncer.Register(p) } +// SnapSyncVersion returns the snap protocol version of the active state syncer. +// Peers negotiating a lower version cannot serve its requests. +func (d *Downloader) SnapSyncVersion() uint { + return d.snapSyncer.Version() +} + // UnregisterSnapPeer removes a snap peer from the active state syncer. It mirrors // RegisterSnapPeer's version gate: a peer below the active syncer's version was // never registered, so there is nothing to remove. diff --git a/eth/downloader/metrics.go b/eth/downloader/metrics.go index bfe80ddbf1..caa8dfb9ee 100644 --- a/eth/downloader/metrics.go +++ b/eth/downloader/metrics.go @@ -38,4 +38,8 @@ var ( receiptTimeoutMeter = metrics.NewRegisteredMeter("eth/downloader/receipts/timeout", nil) throttleCounter = metrics.NewRegisteredCounter("eth/downloader/throttle", nil) + + // snapPeerSkipMeter tracks snap peers skipped by the state syncer because + // they negotiated a version below the one the syncer requires. + snapPeerSkipMeter = metrics.NewRegisteredMeter("eth/downloader/snap/peerskip", nil) ) diff --git a/eth/handler.go b/eth/handler.go index 3c5e122c80..2028517cec 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -245,11 +245,17 @@ func (h *handler) runEthPeer(peer *eth.Peer, handler eth.Handler) error { } reject := false // reserved peer slots if h.downloader.ConfigSyncMode() == ethconfig.SnapSync { - if snap == nil { - // If we are running snap-sync, we want to reserve roughly half the peer - // slots for peers supporting the snap protocol. - // The logic here is; we only allow up to 5 more non-snap peers than snap-peers. - if all, snp := h.peers.len(), h.peers.snapLen(); all-snp > snp+5 { + // A peer is useful to the active state syncer only if it offers the snap + // extension at (or above) the syncer's version. Non-snap peers AND peers + // stuck on an older snap version (e.g. snap/1 while we sync via snap/2) + // are both "non-usable": the node still serves them, but they must not be + // allowed to fill the slots reserved for peers that can serve the sync. + minVersion := h.downloader.SnapSyncVersion() + usable := snap != nil && snap.Version() >= minVersion + if !usable { + // Reserve roughly half the slots for usable peers: only allow up to 5 + // more non-usable peers (non-snap or below-version snap) than usable ones. + if all, snp := h.peers.len(), h.peers.snapLen(minVersion); all-snp > snp+5 { reject = true } } diff --git a/eth/peerset.go b/eth/peerset.go index e6f623f90c..fa946e8b17 100644 --- a/eth/peerset.go +++ b/eth/peerset.go @@ -49,8 +49,7 @@ var ( // peerSet represents the collection of active peers currently participating in // the `eth` protocol, with or without the `snap` extension. type peerSet struct { - peers map[string]*ethPeer // Peers connected on the `eth` protocol - snapPeers int // Number of `snap` compatible peers for connection prioritization + peers map[string]*ethPeer // Peers connected on the `eth` protocol snapWait map[string]chan *snap.Peer // Peers connected on `eth` waiting for their snap extension snapPend map[string]*snap.Peer // Peers connected on the `snap` protocol, but not yet on `eth` @@ -161,7 +160,6 @@ func (ps *peerSet) registerPeer(peer *eth.Peer, ext *snap.Peer) error { } if ext != nil { eth.snapExt = &snapPeer{ext} - ps.snapPeers++ } ps.peers[id] = eth return nil @@ -173,14 +171,10 @@ func (ps *peerSet) unregisterPeer(id string) error { ps.lock.Lock() defer ps.lock.Unlock() - peer, ok := ps.peers[id] - if !ok { + if _, ok := ps.peers[id]; !ok { return errPeerNotRegistered } delete(ps.peers, id) - if peer.snapExt != nil { - ps.snapPeers-- - } return nil } @@ -210,12 +204,21 @@ func (ps *peerSet) len() int { return len(ps.peers) } -// snapLen returns if the current number of `snap` peers in the set. -func (ps *peerSet) snapLen() int { +// snapLen returns the number of `snap` peers whose negotiated version is at +// least minVersion — i.e. peers usable by a state syncer running that version. +// Lower-version snap peers (which the node still serves but cannot sync from) +// are excluded, so they don't get counted toward the reserved snap-peer slots. +func (ps *peerSet) snapLen(minVersion uint) int { ps.lock.RLock() defer ps.lock.RUnlock() - return ps.snapPeers + var n int + for _, p := range ps.peers { + if p.snapExt != nil && p.snapExt.Version() >= minVersion { + n++ + } + } + return n } // close disconnects all peers.