From c2e1785a48f12f1a4f8d533fc2ed5468b6241e92 Mon Sep 17 00:00:00 2001
From: CPerezz <37264926+CPerezz@users.noreply.github.com>
Date: Tue, 24 Feb 2026 02:14:11 +0100
Subject: [PATCH] eth/protocols/snap: restore peers to idle pool on request
 revert (#33790)

All five `revert*Request` functions (account, bytecode, storage,
trienode heal, bytecode heal) remove the request from the tracked set
but never restore the peer to its corresponding idle pool. When a
request times out and no response arrives, the peer is permanently lost
from the idle pool, preventing new work from being assigned to it.

In normal operation mode (snap-sync full state) this bug is masked by
pivot movement (which resets idle pools via new Sync() cycles every ~15
minutes) and peer churn (reconnections re-add peers via Register()).
However in scenarios like the one I have running my (partial-stateful
node)[https://github.com/ethereum/go-ethereum/pull/33764] with
long-running sync cycles and few peers, all peers can eventually leak
out of the idle pools, stalling sync entirely.

Fix: after deleting from the request map, restore the peer to its idle
pool if it is still registered (guards against the peer-drop path where
Unregister already removed the peer). This mirrors the pattern used in
all five On* response handlers.


This only seems to manifest in peer-thirstly scenarios as where I find
myself when testing snapsync for the partial-statefull node).
Still, thought was at least good to raise this point. Unsure if required
to discuss or not
---
 eth/protocols/snap/sync.go | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go
index 08e85c896a..841bfb446e 100644
--- a/eth/protocols/snap/sync.go
+++ b/eth/protocols/snap/sync.go
@@ -1699,9 +1699,13 @@ func (s *Syncer) revertAccountRequest(req *accountRequest) {
 	}
 	close(req.stale)
 
-	// Remove the request from the tracked set
+	// Remove the request from the tracked set and restore the peer to the
+	// idle pool so it can be reassigned work (skip if peer already left).
 	s.lock.Lock()
 	delete(s.accountReqs, req.id)
+	if _, ok := s.peers[req.peer]; ok {
+		s.accountIdlers[req.peer] = struct{}{}
+	}
 	s.lock.Unlock()
 
 	// If there's a timeout timer still running, abort it and mark the account
@@ -1740,9 +1744,13 @@ func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) {
 	}
 	close(req.stale)
 
-	// Remove the request from the tracked set
+	// Remove the request from the tracked set and restore the peer to the
+	// idle pool so it can be reassigned work (skip if peer already left).
 	s.lock.Lock()
 	delete(s.bytecodeReqs, req.id)
+	if _, ok := s.peers[req.peer]; ok {
+		s.bytecodeIdlers[req.peer] = struct{}{}
+	}
 	s.lock.Unlock()
 
 	// If there's a timeout timer still running, abort it and mark the code
@@ -1781,9 +1789,13 @@ func (s *Syncer) revertStorageRequest(req *storageRequest) {
 	}
 	close(req.stale)
 
-	// Remove the request from the tracked set
+	// Remove the request from the tracked set and restore the peer to the
+	// idle pool so it can be reassigned work (skip if peer already left).
 	s.lock.Lock()
 	delete(s.storageReqs, req.id)
+	if _, ok := s.peers[req.peer]; ok {
+		s.storageIdlers[req.peer] = struct{}{}
+	}
 	s.lock.Unlock()
 
 	// If there's a timeout timer still running, abort it and mark the storage
@@ -1826,9 +1838,13 @@ func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) {
 	}
 	close(req.stale)
 
-	// Remove the request from the tracked set
+	// Remove the request from the tracked set and restore the peer to the
+	// idle pool so it can be reassigned work (skip if peer already left).
 	s.lock.Lock()
 	delete(s.trienodeHealReqs, req.id)
+	if _, ok := s.peers[req.peer]; ok {
+		s.trienodeHealIdlers[req.peer] = struct{}{}
+	}
 	s.lock.Unlock()
 
 	// If there's a timeout timer still running, abort it and mark the trie node
@@ -1867,9 +1883,13 @@ func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) {
 	}
 	close(req.stale)
 
-	// Remove the request from the tracked set
+	// Remove the request from the tracked set and restore the peer to the
+	// idle pool so it can be reassigned work (skip if peer already left).
 	s.lock.Lock()
 	delete(s.bytecodeHealReqs, req.id)
+	if _, ok := s.peers[req.peer]; ok {
+		s.bytecodeHealIdlers[req.peer] = struct{}{}
+	}
 	s.lock.Unlock()
 
 	// If there's a timeout timer still running, abort it and mark the code