From 5a918be50d8917196158514e8f688459e526dc10 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 08:23:30 +0200 Subject: [PATCH 01/38] eth: protect high-value peers from random dropping based on inclusion stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dropper periodically disconnects random peers to create churn. This was blind to peer quality. Add inclusion-based peer protection using two categories: 1. Total inclusions: protects peers with the highest cumulative count of delivered txs that were included on chain 2. Recent inclusions (EMA): protects peers with the best recent inclusion rate, giving newly productive peers faster protection Each category independently protects the top 10% of inbound and top 10% of dialed peers. The union of both sets is protected. Only peers with positive scores qualify. The dropper defines its own PeerInclusionStats struct and callback type (getPeerInclusionStatsFunc) so any stats provider (e.g. a transaction tracker) can plug in without a package dependency. The callback is nil by default (protection disabled until wired). The protectionCategories slice is designed for easy extension — adding a new category requires only appending a struct with a name, scoring function, and protection fraction. --- eth/backend.go | 2 +- eth/dropper.go | 136 ++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 123 insertions(+), 15 deletions(-) diff --git a/eth/backend.go b/eth/backend.go index e9bea59734..98aab2de00 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -460,7 +460,7 @@ func (s *Ethereum) Start() error { s.handler.Start(s.p2pServer.MaxPeers) // Start the connection manager - s.dropper.Start(s.p2pServer, func() bool { return !s.Synced() }) + s.dropper.Start(s.p2pServer, func() bool { return !s.Synced() }, nil) // start log indexer s.filterMaps.Start() diff --git a/eth/dropper.go b/eth/dropper.go index dada5d07c0..31da9301d8 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -19,6 +19,7 @@ package eth import ( mrand "math/rand" "slices" + "sort" "sync" "time" @@ -40,6 +41,10 @@ const ( // dropping when no more peers can be added. Larger numbers result in more // aggressive drop behavior. peerDropThreshold = 0 + // Fraction of inbound/dialed peers to protect based on inclusion stats. + // The top inclusionProtectionFrac of each category (by score) are + // shielded from random dropping. 0.1 = top 10%. + inclusionProtectionFrac = 0.1 ) var ( @@ -47,8 +52,37 @@ var ( droppedInbound = metrics.NewRegisteredMeter("eth/dropper/inbound", nil) // droppedOutbound is the number of outbound peers dropped droppedOutbound = metrics.NewRegisteredMeter("eth/dropper/outbound", nil) + // droppedProtected counts times a drop was skipped because all + // droppable candidates were protected by inclusion stats. + droppedProtected = metrics.NewRegisteredMeter("eth/dropper/protected", nil) ) +// PeerInclusionStats holds the per-peer inclusion data needed by the dropper +// to decide which peers to protect. Any stats provider (e.g. txtracker) can +// implement getPeerInclusionStatsFunc by returning this struct per peer ID. +type PeerInclusionStats struct { + Included int64 // Cumulative on-chain inclusions attributed to this peer + RecentIncluded float64 // EMA of per-block inclusions (0 if not tracked) +} + +// Callback type to get per-peer inclusion statistics. +type getPeerInclusionStatsFunc func() map[string]PeerInclusionStats + +// protectionCategory defines a peer scoring function and the fraction of peers +// to protect per inbound/dialed category. Multiple categories are unioned. +type protectionCategory struct { + name string + score func(PeerInclusionStats) float64 + frac float64 // fraction of max peers to protect (0.0–1.0) +} + +// protectionCategories is the list of protection criteria. Each category +// independently selects its top-N peers per pool; the union is protected. +var protectionCategories = []protectionCategory{ + {"total-included", func(s PeerInclusionStats) float64 { return float64(s.Included) }, inclusionProtectionFrac}, + {"recent-included", func(s PeerInclusionStats) float64 { return s.RecentIncluded }, inclusionProtectionFrac}, +} + // dropper monitors the state of the peer pool and makes changes as follows: // - during sync the Downloader handles peer connections, so dropper is disabled // - if not syncing and the peer count is close to the limit, it drops peers @@ -59,6 +93,7 @@ type dropper struct { maxInboundPeers int // maximum number of inbound peers peersFunc getPeersFunc syncingFunc getSyncingFunc + peerStatsFunc getPeerInclusionStatsFunc // optional: inclusion stats for protection // peerDropTimer introduces churn if we are close to limit capacity. // We handle Dialed and Inbound connections separately @@ -88,10 +123,12 @@ func newDropper(maxDialPeers, maxInboundPeers int) *dropper { return cm } -// Start the dropper. -func (cm *dropper) Start(srv *p2p.Server, syncingFunc getSyncingFunc) { +// Start the dropper. peerStatsFunc is optional (nil disables inclusion +// protection). +func (cm *dropper) Start(srv *p2p.Server, syncingFunc getSyncingFunc, peerStatsFunc getPeerInclusionStatsFunc) { cm.peersFunc = srv.Peers cm.syncingFunc = syncingFunc + cm.peerStatsFunc = peerStatsFunc cm.wg.Add(1) go cm.loop() } @@ -125,19 +162,90 @@ func (cm *dropper) dropRandomPeer() bool { } droppable := slices.DeleteFunc(peers, selectDoNotDrop) - if len(droppable) > 0 { - p := droppable[mrand.Intn(len(droppable))] - log.Debug("Dropping random peer", "inbound", p.Inbound(), - "id", p.ID(), "duration", common.PrettyDuration(p.Lifetime()), "peercountbefore", len(peers)) - p.Disconnect(p2p.DiscUselessPeer) - if p.Inbound() { - droppedInbound.Mark(1) - } else { - droppedOutbound.Mark(1) - } - return true + if len(droppable) == 0 { + return false } - return false + // Protect peers with the highest inclusion stats. + if cm.peerStatsFunc != nil { + droppable = cm.filterProtectedPeers(droppable) + if len(droppable) == 0 { + droppedProtected.Mark(1) + return false + } + } + p := droppable[mrand.Intn(len(droppable))] + log.Debug("Dropping random peer", "inbound", p.Inbound(), + "id", p.ID(), "duration", common.PrettyDuration(p.Lifetime()), "peercountbefore", len(peers)) + p.Disconnect(p2p.DiscUselessPeer) + if p.Inbound() { + droppedInbound.Mark(1) + } else { + droppedOutbound.Mark(1) + } + return true +} + +// filterProtectedPeers removes peers from the droppable list that are +// protected by any of the protection categories. Each category independently +// selects the top-N peers per inbound/dialed pool by score; the union of all +// selections is protected. +func (cm *dropper) filterProtectedPeers(droppable []*p2p.Peer) []*p2p.Peer { + stats := cm.peerStatsFunc() + if len(stats) == 0 { + return droppable + } + type peerWithStats struct { + peer *p2p.Peer + s PeerInclusionStats + } + var inbound, dialed []peerWithStats + for _, p := range droppable { + id := p.ID().String() + entry := peerWithStats{p, stats[id]} + if p.Inbound() { + inbound = append(inbound, entry) + } else { + dialed = append(dialed, entry) + } + } + protectedSet := make(map[*p2p.Peer]struct{}) + + protectTopN := func(entries []peerWithStats, maxPeers int, cat protectionCategory) { + n := int(float64(maxPeers) * cat.frac) + if n == 0 || len(entries) == 0 { + return + } + sort.Slice(entries, func(i, j int) bool { + return cat.score(entries[i].s) > cat.score(entries[j].s) + }) + for i := 0; i < n && i < len(entries); i++ { + if cat.score(entries[i].s) > 0 { + protectedSet[entries[i].peer] = struct{}{} + } + } + } + for _, cat := range protectionCategories { + inCopy := make([]peerWithStats, len(inbound)) + copy(inCopy, inbound) + dialCopy := make([]peerWithStats, len(dialed)) + copy(dialCopy, dialed) + + protectTopN(inCopy, cm.maxInboundPeers, cat) + protectTopN(dialCopy, cm.maxDialPeers, cat) + } + if len(protectedSet) == 0 { + return droppable + } + log.Debug("Protecting high-value peers from drop", + "protected", len(protectedSet), "droppable", len(droppable)) + + result := make([]*p2p.Peer, 0, len(droppable)) + for _, p := range droppable { + if _, ok := protectedSet[p]; !ok { + result = append(result, p) + } + } + return result } // randomDuration generates a random duration between min and max. From 9f2575efeb7fbc4e6d1b1d22004081d396ac1ab2 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 08:30:21 +0200 Subject: [PATCH 02/38] eth/txtracker: add minimal tracker as inclusion stats provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Minimal txtracker that records which peer delivered each transaction and credits peers when their transactions appear on chain. Provides the PeerInclusionStats needed by the dropper's protection logic. Design: - NotifyReceived(peer, txs): records deliverer per tx hash (called from handler_eth.go when tx bodies arrive via P2P) - Subscribes to ChainHeadEvent, fetches block txs, credits the delivering peer for each included tx - Per-peer EMA of recent inclusions (alpha=0.05), updated every block - LRU eviction at 262K entries to bound memory - Mutex-based (not channel-based) for simplicity — the hot path (NotifyReceived) is a fast map insert Wired into the dropper via an adapter callback in backend.go that converts txtracker.PeerStats to the dropper's PeerInclusionStats. --- eth/backend.go | 15 +++- eth/handler.go | 3 + eth/handler_eth.go | 2 + eth/txtracker/tracker.go | 165 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 eth/txtracker/tracker.go diff --git a/eth/backend.go b/eth/backend.go index 98aab2de00..5dcc2f03f9 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -459,8 +459,18 @@ func (s *Ethereum) Start() error { // Start the networking layer s.handler.Start(s.p2pServer.MaxPeers) - // Start the connection manager - s.dropper.Start(s.p2pServer, func() bool { return !s.Synced() }, nil) + // Start the transaction tracker (records tx deliveries, credits peer inclusions). + s.handler.txTracker.Start(s.blockchain) + + // Start the connection manager with inclusion-based peer protection. + s.dropper.Start(s.p2pServer, func() bool { return !s.Synced() }, func() map[string]PeerInclusionStats { + stats := s.handler.txTracker.GetAllPeerStats() + result := make(map[string]PeerInclusionStats, len(stats)) + for id, ps := range stats { + result[id] = PeerInclusionStats{Included: ps.Included, RecentIncluded: ps.RecentIncluded} + } + return result + }) // start log indexer s.filterMaps.Start() @@ -584,6 +594,7 @@ func (s *Ethereum) Stop() error { // Stop all the peer-related stuff first. s.discmix.Close() s.dropper.Stop() + s.handler.txTracker.Stop() s.handler.Stop() // Then stop everything else. diff --git a/eth/handler.go b/eth/handler.go index 27b5e60697..90d74a71bf 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -36,6 +36,7 @@ import ( "github.com/ethereum/go-ethereum/eth/downloader" "github.com/ethereum/go-ethereum/eth/ethconfig" "github.com/ethereum/go-ethereum/eth/fetcher" + "github.com/ethereum/go-ethereum/eth/txtracker" "github.com/ethereum/go-ethereum/eth/protocols/eth" "github.com/ethereum/go-ethereum/eth/protocols/snap" "github.com/ethereum/go-ethereum/ethdb" @@ -123,6 +124,7 @@ type handler struct { downloader *downloader.Downloader txFetcher *fetcher.TxFetcher + txTracker *txtracker.Tracker peers *peerSet txBroadcastKey [16]byte @@ -189,6 +191,7 @@ func newHandler(config *handlerConfig) (*handler, error) { return nil } h.txFetcher = fetcher.NewTxFetcher(h.chain, validateMeta, addTxs, fetchTx, h.removePeer) + h.txTracker = txtracker.New() return h, nil } diff --git a/eth/handler_eth.go b/eth/handler_eth.go index 8704a86af4..8974e4b8ab 100644 --- a/eth/handler_eth.go +++ b/eth/handler_eth.go @@ -66,6 +66,7 @@ func (h *ethHandler) Handle(peer *eth.Peer, packet eth.Packet) error { if err != nil { return fmt.Errorf("Transactions: %v", err) } + h.txTracker.NotifyReceived(peer.ID(), txs) if err := handleTransactions(peer, txs, true); err != nil { return fmt.Errorf("Transactions: %v", err) } @@ -76,6 +77,7 @@ func (h *ethHandler) Handle(peer *eth.Peer, packet eth.Packet) error { if err != nil { return fmt.Errorf("PooledTransactions: %v", err) } + h.txTracker.NotifyReceived(peer.ID(), txs) if err := handleTransactions(peer, txs, false); err != nil { return fmt.Errorf("PooledTransactions: %v", err) } diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go new file mode 100644 index 0000000000..8383970810 --- /dev/null +++ b/eth/txtracker/tracker.go @@ -0,0 +1,165 @@ +// Package txtracker provides minimal per-peer transaction inclusion tracking. +// It records which peer delivered each transaction and credits peers when +// their delivered transactions are included on chain. +package txtracker + +import ( + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/event" + "github.com/ethereum/go-ethereum/log" +) + +const ( + // Maximum number of tx→deliverer mappings to retain. + maxTracked = 262144 + // EMA smoothing factor for per-block inclusion rate. + emaAlpha = 0.05 +) + +// PeerStats holds the per-peer inclusion data. +type PeerStats struct { + Included int64 // Cumulative on-chain inclusions attributed to this peer + RecentIncluded float64 // EMA of per-block inclusions +} + +// Chain is the blockchain interface needed by the tracker. +type Chain interface { + SubscribeChainHeadEvent(ch chan<- core.ChainHeadEvent) event.Subscription + GetBlockByNumber(number uint64) *types.Block +} + +type peerStats struct { + included int64 + recentIncluded float64 +} + +// Tracker records which peer delivered each transaction and credits peers +// when their transactions appear on chain. +type Tracker struct { + mu sync.Mutex + txs map[common.Hash]string // hash → deliverer peer ID + peers map[string]*peerStats + order []common.Hash // insertion order for LRU eviction + + chain Chain + headCh chan core.ChainHeadEvent + sub event.Subscription + + quit chan struct{} + wg sync.WaitGroup +} + +// New creates a new tracker. +func New() *Tracker { + return &Tracker{ + txs: make(map[common.Hash]string), + peers: make(map[string]*peerStats), + quit: make(chan struct{}), + } +} + +// Start begins listening for chain head events. +func (t *Tracker) Start(chain Chain) { + t.chain = chain + t.headCh = make(chan core.ChainHeadEvent, 128) + t.sub = chain.SubscribeChainHeadEvent(t.headCh) + t.wg.Add(1) + go t.loop() +} + +// Stop shuts down the tracker. +func (t *Tracker) Stop() { + t.sub.Unsubscribe() + close(t.quit) + t.wg.Wait() +} + +// NotifyReceived records that a peer delivered transaction bodies. +// Safe to call from any goroutine. +func (t *Tracker) NotifyReceived(peer string, txs []*types.Transaction) { + t.mu.Lock() + defer t.mu.Unlock() + + for _, tx := range txs { + hash := tx.Hash() + if _, ok := t.txs[hash]; ok { + continue // already tracked, keep first deliverer + } + t.txs[hash] = peer + t.order = append(t.order, hash) + } + // Evict oldest entries if over capacity. + for len(t.txs) > maxTracked { + oldest := t.order[0] + t.order = t.order[1:] + delete(t.txs, oldest) + } +} + +// GetAllPeerStats returns a snapshot of per-peer inclusion statistics. +// Safe to call from any goroutine. +func (t *Tracker) GetAllPeerStats() map[string]PeerStats { + t.mu.Lock() + defer t.mu.Unlock() + + result := make(map[string]PeerStats, len(t.peers)) + for id, ps := range t.peers { + result[id] = PeerStats{ + Included: ps.included, + RecentIncluded: ps.recentIncluded, + } + } + return result +} + +func (t *Tracker) loop() { + defer t.wg.Done() + + for { + select { + case ev := <-t.headCh: + t.handleChainHead(ev) + case <-t.sub.Err(): + return + case <-t.quit: + return + } + } +} + +func (t *Tracker) handleChainHead(ev core.ChainHeadEvent) { + block := t.chain.GetBlockByNumber(ev.Header.Number.Uint64()) + if block == nil { + return + } + t.mu.Lock() + defer t.mu.Unlock() + + // Credit delivering peers for each included transaction. + blockIncl := make(map[string]int) + for _, tx := range block.Transactions() { + hash := tx.Hash() + peer, ok := t.txs[hash] + if !ok || peer == "" { + continue + } + ps := t.peers[peer] + if ps == nil { + ps = &peerStats{} + t.peers[peer] = ps + } + ps.included++ + blockIncl[peer]++ + } + // Update per-peer recent-inclusion EMA for all tracked peers. + for peer, ps := range t.peers { + ps.recentIncluded = (1-emaAlpha)*ps.recentIncluded + emaAlpha*float64(blockIncl[peer]) + } + if len(blockIncl) > 0 { + log.Trace("Credited peers for block inclusions", "block", ev.Header.Number, "peers", len(blockIncl)) + } +} From 98ffc7bd37fc130367823e8c7ddeb9afee415f36 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 08:56:32 +0200 Subject: [PATCH 03/38] eth: use finalized count for total protection, keep EMA on inclusions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the long-term protection category from total inclusions to total finalized inclusions. Finalized txs are harder to game (require actual block finality, not just inclusion) and represent confirmed on-chain value. The recent-inclusion EMA stays on chain head inclusions for responsiveness — a peer delivering txs that appear in the latest blocks gets quick protection without waiting for finalization. The tracker now checks CurrentFinalBlock() on each chain head event and credits delivering peers for all newly finalized blocks since the last check. --- eth/backend.go | 2 +- eth/dropper.go | 4 +-- eth/txtracker/tracker.go | 78 ++++++++++++++++++++++++++++------------ 3 files changed, 58 insertions(+), 26 deletions(-) diff --git a/eth/backend.go b/eth/backend.go index 5dcc2f03f9..d5ec34e21d 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -467,7 +467,7 @@ func (s *Ethereum) Start() error { stats := s.handler.txTracker.GetAllPeerStats() result := make(map[string]PeerInclusionStats, len(stats)) for id, ps := range stats { - result[id] = PeerInclusionStats{Included: ps.Included, RecentIncluded: ps.RecentIncluded} + result[id] = PeerInclusionStats{Finalized: ps.Finalized, RecentIncluded: ps.RecentIncluded} } return result }) diff --git a/eth/dropper.go b/eth/dropper.go index 31da9301d8..e8710c3ec7 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -61,7 +61,7 @@ var ( // to decide which peers to protect. Any stats provider (e.g. txtracker) can // implement getPeerInclusionStatsFunc by returning this struct per peer ID. type PeerInclusionStats struct { - Included int64 // Cumulative on-chain inclusions attributed to this peer + Finalized int64 // Cumulative finalized inclusions attributed to this peer RecentIncluded float64 // EMA of per-block inclusions (0 if not tracked) } @@ -79,7 +79,7 @@ type protectionCategory struct { // protectionCategories is the list of protection criteria. Each category // independently selects its top-N peers per pool; the union is protected. var protectionCategories = []protectionCategory{ - {"total-included", func(s PeerInclusionStats) float64 { return float64(s.Included) }, inclusionProtectionFrac}, + {"total-finalized", func(s PeerInclusionStats) float64 { return float64(s.Finalized) }, inclusionProtectionFrac}, {"recent-included", func(s PeerInclusionStats) float64 { return s.RecentIncluded }, inclusionProtectionFrac}, } diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go index 8383970810..5fc479a49c 100644 --- a/eth/txtracker/tracker.go +++ b/eth/txtracker/tracker.go @@ -22,18 +22,19 @@ const ( // PeerStats holds the per-peer inclusion data. type PeerStats struct { - Included int64 // Cumulative on-chain inclusions attributed to this peer - RecentIncluded float64 // EMA of per-block inclusions + Finalized int64 // Cumulative finalized inclusions attributed to this peer + RecentIncluded float64 // EMA of per-block inclusions (at chain head time) } // Chain is the blockchain interface needed by the tracker. type Chain interface { SubscribeChainHeadEvent(ch chan<- core.ChainHeadEvent) event.Subscription GetBlockByNumber(number uint64) *types.Block + CurrentFinalBlock() *types.Header } type peerStats struct { - included int64 + finalized int64 recentIncluded float64 } @@ -45,9 +46,10 @@ type Tracker struct { peers map[string]*peerStats order []common.Hash // insertion order for LRU eviction - chain Chain - headCh chan core.ChainHeadEvent - sub event.Subscription + chain Chain + lastFinalNum uint64 // last finalized block number processed + headCh chan core.ChainHeadEvent + sub event.Subscription quit chan struct{} wg sync.WaitGroup @@ -109,7 +111,7 @@ func (t *Tracker) GetAllPeerStats() map[string]PeerStats { result := make(map[string]PeerStats, len(t.peers)) for id, ps := range t.peers { result[id] = PeerStats{ - Included: ps.included, + Finalized: ps.finalized, RecentIncluded: ps.recentIncluded, } } @@ -132,6 +134,7 @@ func (t *Tracker) loop() { } func (t *Tracker) handleChainHead(ev core.ChainHeadEvent) { + // Update recent-inclusion EMA from the new head block. block := t.chain.GetBlockByNumber(ev.Header.Number.Uint64()) if block == nil { return @@ -139,27 +142,56 @@ func (t *Tracker) handleChainHead(ev core.ChainHeadEvent) { t.mu.Lock() defer t.mu.Unlock() - // Credit delivering peers for each included transaction. + // Count per-peer inclusions in this block for the EMA. blockIncl := make(map[string]int) for _, tx := range block.Transactions() { - hash := tx.Hash() - peer, ok := t.txs[hash] - if !ok || peer == "" { - continue + if peer := t.txs[tx.Hash()]; peer != "" { + blockIncl[peer]++ } - ps := t.peers[peer] - if ps == nil { - ps = &peerStats{} - t.peers[peer] = ps - } - ps.included++ - blockIncl[peer]++ } - // Update per-peer recent-inclusion EMA for all tracked peers. + // Update EMA for all tracked peers (decay inactive ones). for peer, ps := range t.peers { ps.recentIncluded = (1-emaAlpha)*ps.recentIncluded + emaAlpha*float64(blockIncl[peer]) } - if len(blockIncl) > 0 { - log.Trace("Credited peers for block inclusions", "block", ev.Header.Number, "peers", len(blockIncl)) - } + // Check if the finalized block has advanced. + t.checkFinalization() +} + +// checkFinalization credits peers for transactions in newly finalized blocks. +// Must be called with t.mu held. +func (t *Tracker) checkFinalization() { + finalHeader := t.chain.CurrentFinalBlock() + if finalHeader == nil { + return + } + finalNum := finalHeader.Number.Uint64() + if finalNum <= t.lastFinalNum { + return + } + // Credit peers for all blocks from lastFinalNum+1 to finalNum. + var credited int + for num := t.lastFinalNum + 1; num <= finalNum; num++ { + block := t.chain.GetBlockByNumber(num) + if block == nil { + continue + } + for _, tx := range block.Transactions() { + peer := t.txs[tx.Hash()] + if peer == "" { + continue + } + ps := t.peers[peer] + if ps == nil { + ps = &peerStats{} + t.peers[peer] = ps + } + ps.finalized++ + credited++ + } + } + if credited > 0 { + log.Trace("Credited peers for finalized inclusions", + "from", t.lastFinalNum+1, "to", finalNum, "txs", credited) + } + t.lastFinalNum = finalNum } From 58556173f65bf86803f76ea5626dbaf8e9ad33cb Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 08:59:09 +0200 Subject: [PATCH 04/38] eth: improve package and type documentation for txtracker and dropper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expand the txtracker package doc to describe the tracking flow (NotifyReceived → chain head → finalization → peer credit) and its role as stats provider for the dropper. Rewrite the dropper struct comment to document the full behavior including the inclusion-based peer protection: two scoring categories (total finalized + recent EMA), top 10% per pool, union of protected sets. --- eth/dropper.go | 18 +++++++++++++----- eth/txtracker/tracker.go | 11 +++++++++-- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/eth/dropper.go b/eth/dropper.go index e8710c3ec7..87c72cfa0f 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -83,11 +83,19 @@ var protectionCategories = []protectionCategory{ {"recent-included", func(s PeerInclusionStats) float64 { return s.RecentIncluded }, inclusionProtectionFrac}, } -// dropper monitors the state of the peer pool and makes changes as follows: -// - during sync the Downloader handles peer connections, so dropper is disabled -// - if not syncing and the peer count is close to the limit, it drops peers -// randomly every peerDropInterval to make space for new peers -// - peers are dropped separately from the inboud pool and from the dialed pool +// dropper monitors the state of the peer pool and introduces churn by +// periodically disconnecting a random peer to make room for new connections. +// +// Behavior: +// - During sync the Downloader handles peer connections, so dropper is disabled. +// - When not syncing and a peer category (inbound or dialed) is close to its +// limit, a random peer from that category is disconnected every 3–7 minutes. +// - Trusted, static, and recently connected peers are never dropped. +// - Peers that contribute the most on-chain transaction inclusions are +// protected from dropping. Two scoring categories are used (total finalized +// inclusions and recent inclusion EMA), each protecting the top 10% of +// inbound and dialed peers independently. The union of all protected sets +// is shielded; the drop target is chosen randomly from the remainder. type dropper struct { maxDialPeers int // maximum number of dialed peers maxInboundPeers int // maximum number of inbound peers diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go index 5fc479a49c..aa962cdf32 100644 --- a/eth/txtracker/tracker.go +++ b/eth/txtracker/tracker.go @@ -1,6 +1,13 @@ // Package txtracker provides minimal per-peer transaction inclusion tracking. -// It records which peer delivered each transaction and credits peers when -// their delivered transactions are included on chain. +// +// It records which peer delivered each transaction body (via NotifyReceived) +// and monitors the chain for inclusion and finalization events. When a +// delivered transaction is finalized on chain, the delivering peer is +// credited. A per-block exponential moving average (EMA) of inclusions +// tracks recent peer productivity. +// +// The primary consumer is the peer dropper (eth/dropper.go), which uses +// these stats to protect high-value peers from random disconnection. package txtracker import ( From 8bfddee2ea4c45988e4af96ee434bf5e89295471 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 09:07:38 +0200 Subject: [PATCH 05/38] eth: add tests for txtracker and dropper peer protection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit txtracker tests (7 tests): - NotifyReceived: stats empty before chain events - InclusionEMA: EMA increases on inclusion, decays on empty blocks - Finalization: Finalized counter credited after finalization - MultiplePeers: each peer credited for own txs only - FirstDelivererWins: duplicate delivery ignored - NoFinalizationCredit: no credit without finalization - EMADecay: EMA approaches zero after 30 empty blocks dropper tests (6 tests): - FilterProtectedNoStats: nil stats → all droppable - FilterProtectedEmptyStats: empty map → all droppable - FilterProtectedTopPeer: top-scored peers removed from droppable - FilterProtectedZeroScore: zero scores → no protection - FilterProtectedOverlap: peer top in both categories → counted once - FilterProtectedAllProtected: all droppable protected → empty list Also fix: create peer entries during EMA update for peers with inclusions in the current block (previously only created during finalization, so EMA was not tracked before first finalization). --- eth/dropper_test.go | 125 ++++++++++++++++ eth/txtracker/tracker.go | 6 + eth/txtracker/tracker_test.go | 262 ++++++++++++++++++++++++++++++++++ 3 files changed, 393 insertions(+) create mode 100644 eth/dropper_test.go create mode 100644 eth/txtracker/tracker_test.go diff --git a/eth/dropper_test.go b/eth/dropper_test.go new file mode 100644 index 0000000000..8c08893ee3 --- /dev/null +++ b/eth/dropper_test.go @@ -0,0 +1,125 @@ +package eth + +import ( + "fmt" + "testing" + + "github.com/ethereum/go-ethereum/p2p" + "github.com/ethereum/go-ethereum/p2p/enode" +) + +func makePeers(n int) []*p2p.Peer { + peers := make([]*p2p.Peer, n) + for i := range peers { + id := enode.ID{byte(i)} + peers[i] = p2p.NewPeer(id, fmt.Sprintf("peer%d", i), nil) + } + return peers +} + +func TestFilterProtectedNoStats(t *testing.T) { + // When the stats func returns nil/empty, all peers remain droppable. + cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} + cm.peerStatsFunc = func() map[string]PeerInclusionStats { return nil } + + peers := makePeers(10) + result := cm.filterProtectedPeers(peers) + if len(result) != len(peers) { + t.Fatalf("expected all peers droppable with nil stats, got %d/%d", len(result), len(peers)) + } +} + +func TestFilterProtectedEmptyStats(t *testing.T) { + cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} + cm.peerStatsFunc = func() map[string]PeerInclusionStats { + return map[string]PeerInclusionStats{} + } + + peers := makePeers(10) + result := cm.filterProtectedPeers(peers) + if len(result) != len(peers) { + t.Fatalf("expected all peers droppable with empty stats, got %d/%d", len(result), len(peers)) + } +} + +func TestFilterProtectedTopPeer(t *testing.T) { + // 20 peers, maxDialPeers=20, 10% = 2 protected per category. + // NewPeer creates non-inbound peers, so all go to dialed bucket. + cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} + + peers := makePeers(20) + stats := make(map[string]PeerInclusionStats) + // Peer 0: top by Finalized + stats[peers[0].ID().String()] = PeerInclusionStats{Finalized: 100} + // Peer 1: top by RecentIncluded + stats[peers[1].ID().String()] = PeerInclusionStats{RecentIncluded: 5.0} + + cm.peerStatsFunc = func() map[string]PeerInclusionStats { return stats } + + result := cm.filterProtectedPeers(peers) + // 2 categories × 2 protected each = up to 4, but peers 0 and 1 are + // different so both should be removed. Other peers have zero scores. + protected := len(peers) - len(result) + if protected != 2 { + t.Fatalf("expected 2 protected peers, got %d", protected) + } + // Verify peers 0 and 1 are not in result. + for _, p := range result { + id := p.ID().String() + if id == peers[0].ID().String() || id == peers[1].ID().String() { + t.Fatalf("peer %s should be protected", id) + } + } +} + +func TestFilterProtectedZeroScore(t *testing.T) { + cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} + + peers := makePeers(10) + stats := make(map[string]PeerInclusionStats) + // All peers have zero stats. + for _, p := range peers { + stats[p.ID().String()] = PeerInclusionStats{} + } + cm.peerStatsFunc = func() map[string]PeerInclusionStats { return stats } + + result := cm.filterProtectedPeers(peers) + if len(result) != len(peers) { + t.Fatalf("expected no protection with zero scores, got %d protected", len(peers)-len(result)) + } +} + +func TestFilterProtectedOverlap(t *testing.T) { + // One peer is top in both categories — should only be removed once. + cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} + + peers := makePeers(20) + stats := make(map[string]PeerInclusionStats) + // Peer 0 is top in both. + stats[peers[0].ID().String()] = PeerInclusionStats{Finalized: 100, RecentIncluded: 5.0} + + cm.peerStatsFunc = func() map[string]PeerInclusionStats { return stats } + + result := cm.filterProtectedPeers(peers) + protected := len(peers) - len(result) + if protected != 1 { + t.Fatalf("expected 1 protected peer (overlap), got %d", protected) + } +} + +func TestFilterProtectedAllProtected(t *testing.T) { + // Only 2 droppable peers, both are top by different categories. + cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} + + peers := makePeers(2) + stats := make(map[string]PeerInclusionStats) + stats[peers[0].ID().String()] = PeerInclusionStats{Finalized: 100} + stats[peers[1].ID().String()] = PeerInclusionStats{RecentIncluded: 5.0} + + cm.peerStatsFunc = func() map[string]PeerInclusionStats { return stats } + + result := cm.filterProtectedPeers(peers) + if len(result) != 0 { + t.Fatalf("expected all peers protected, got %d droppable", len(result)) + } +} diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go index aa962cdf32..5a18402645 100644 --- a/eth/txtracker/tracker.go +++ b/eth/txtracker/tracker.go @@ -156,6 +156,12 @@ func (t *Tracker) handleChainHead(ev core.ChainHeadEvent) { blockIncl[peer]++ } } + // Ensure peers with inclusions in this block have entries. + for peer := range blockIncl { + if t.peers[peer] == nil { + t.peers[peer] = &peerStats{} + } + } // Update EMA for all tracked peers (decay inactive ones). for peer, ps := range t.peers { ps.recentIncluded = (1-emaAlpha)*ps.recentIncluded + emaAlpha*float64(blockIncl[peer]) diff --git a/eth/txtracker/tracker_test.go b/eth/txtracker/tracker_test.go new file mode 100644 index 0000000000..dc346f5255 --- /dev/null +++ b/eth/txtracker/tracker_test.go @@ -0,0 +1,262 @@ +package txtracker + +import ( + "math/big" + "sync" + "testing" + "time" + + "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/event" + "github.com/ethereum/go-ethereum/trie" +) + +// mockChain implements the Chain interface for testing. +type mockChain struct { + mu sync.Mutex + headFeed event.Feed + blocks map[uint64]*types.Block + finalNum uint64 +} + +func newMockChain() *mockChain { + return &mockChain{blocks: make(map[uint64]*types.Block)} +} + +func (c *mockChain) SubscribeChainHeadEvent(ch chan<- core.ChainHeadEvent) event.Subscription { + return c.headFeed.Subscribe(ch) +} + +func (c *mockChain) GetBlockByNumber(number uint64) *types.Block { + c.mu.Lock() + defer c.mu.Unlock() + return c.blocks[number] +} + +func (c *mockChain) CurrentFinalBlock() *types.Header { + c.mu.Lock() + defer c.mu.Unlock() + if c.finalNum == 0 { + return nil + } + return &types.Header{Number: new(big.Int).SetUint64(c.finalNum)} +} + +func (c *mockChain) addBlock(num uint64, txs []*types.Transaction) { + c.mu.Lock() + defer c.mu.Unlock() + header := &types.Header{Number: new(big.Int).SetUint64(num)} + c.blocks[num] = types.NewBlock(header, &types.Body{Transactions: txs}, nil, trie.NewListHasher()) +} + +func (c *mockChain) setFinalBlock(num uint64) { + c.mu.Lock() + defer c.mu.Unlock() + c.finalNum = num +} + +func (c *mockChain) sendHead(num uint64) { + c.headFeed.Send(core.ChainHeadEvent{ + Header: &types.Header{Number: new(big.Int).SetUint64(num)}, + }) +} + +func makeTx(nonce uint64) *types.Transaction { + return types.NewTx(&types.LegacyTx{Nonce: nonce, GasPrice: big.NewInt(1), Gas: 21000}) +} + +// waitForHead gives the tracker time to process a chain head event. +func waitForHead() { + time.Sleep(50 * time.Millisecond) +} + +func TestNotifyReceived(t *testing.T) { + tr := New() + chain := newMockChain() + tr.Start(chain) + defer tr.Stop() + + txs := []*types.Transaction{makeTx(1), makeTx(2), makeTx(3)} + tr.NotifyReceived("peerA", txs) + + // No chain events yet — stats should be empty. + stats := tr.GetAllPeerStats() + if len(stats) != 0 { + t.Fatalf("expected empty stats before any chain events, got %d peers", len(stats)) + } +} + +func TestInclusionEMA(t *testing.T) { + tr := New() + chain := newMockChain() + tr.Start(chain) + defer tr.Stop() + + tx := makeTx(1) + tr.NotifyReceived("peerA", []*types.Transaction{tx}) + + // Block 1 includes peerA's tx. + chain.addBlock(1, []*types.Transaction{tx}) + chain.sendHead(1) + waitForHead() + + stats := tr.GetAllPeerStats() + if stats["peerA"].RecentIncluded <= 0 { + t.Fatalf("expected RecentIncluded > 0 after inclusion, got %f", stats["peerA"].RecentIncluded) + } + ema1 := stats["peerA"].RecentIncluded + + // Block 2 has no txs from peerA — EMA should decay. + chain.addBlock(2, nil) + chain.sendHead(2) + waitForHead() + + stats = tr.GetAllPeerStats() + if stats["peerA"].RecentIncluded >= ema1 { + t.Fatalf("expected EMA to decay, got %f >= %f", stats["peerA"].RecentIncluded, ema1) + } +} + +func TestFinalization(t *testing.T) { + tr := New() + chain := newMockChain() + tr.Start(chain) + defer tr.Stop() + + tx := makeTx(1) + tr.NotifyReceived("peerA", []*types.Transaction{tx}) + + // Include in block 1. + chain.addBlock(1, []*types.Transaction{tx}) + chain.sendHead(1) + waitForHead() + + // Not finalized yet. + stats := tr.GetAllPeerStats() + if stats["peerA"].Finalized != 0 { + t.Fatalf("expected Finalized=0 before finalization, got %d", stats["peerA"].Finalized) + } + + // Finalize block 1, then send head 2 to trigger checkFinalization. + chain.setFinalBlock(1) + chain.addBlock(2, nil) + chain.sendHead(2) + waitForHead() + + stats = tr.GetAllPeerStats() + if stats["peerA"].Finalized != 1 { + t.Fatalf("expected Finalized=1 after finalization, got %d", stats["peerA"].Finalized) + } +} + +func TestMultiplePeers(t *testing.T) { + tr := New() + chain := newMockChain() + tr.Start(chain) + defer tr.Stop() + + tx1 := makeTx(1) + tx2 := makeTx(2) + tr.NotifyReceived("peerA", []*types.Transaction{tx1}) + tr.NotifyReceived("peerB", []*types.Transaction{tx2}) + + // Both included in block 1. + chain.addBlock(1, []*types.Transaction{tx1, tx2}) + chain.sendHead(1) + waitForHead() + + // Finalize. + chain.setFinalBlock(1) + chain.addBlock(2, nil) + chain.sendHead(2) + waitForHead() + + stats := tr.GetAllPeerStats() + if stats["peerA"].Finalized != 1 { + t.Fatalf("peerA: expected Finalized=1, got %d", stats["peerA"].Finalized) + } + if stats["peerB"].Finalized != 1 { + t.Fatalf("peerB: expected Finalized=1, got %d", stats["peerB"].Finalized) + } +} + +func TestFirstDelivererWins(t *testing.T) { + tr := New() + chain := newMockChain() + tr.Start(chain) + defer tr.Stop() + + tx := makeTx(1) + tr.NotifyReceived("peerA", []*types.Transaction{tx}) + tr.NotifyReceived("peerB", []*types.Transaction{tx}) // duplicate, should be ignored + + chain.addBlock(1, []*types.Transaction{tx}) + chain.sendHead(1) + waitForHead() + + chain.setFinalBlock(1) + chain.addBlock(2, nil) + chain.sendHead(2) + waitForHead() + + stats := tr.GetAllPeerStats() + if stats["peerA"].Finalized != 1 { + t.Fatalf("peerA should be credited, got Finalized=%d", stats["peerA"].Finalized) + } + if stats["peerB"].Finalized != 0 { + t.Fatalf("peerB should NOT be credited, got Finalized=%d", stats["peerB"].Finalized) + } +} + +func TestNoFinalizationCredit(t *testing.T) { + tr := New() + chain := newMockChain() + tr.Start(chain) + defer tr.Stop() + + tx := makeTx(1) + tr.NotifyReceived("peerA", []*types.Transaction{tx}) + + // Include but don't finalize. + chain.addBlock(1, []*types.Transaction{tx}) + chain.sendHead(1) + waitForHead() + + // Send more heads without finalization. + chain.addBlock(2, nil) + chain.sendHead(2) + waitForHead() + + stats := tr.GetAllPeerStats() + if stats["peerA"].Finalized != 0 { + t.Fatalf("expected Finalized=0 without finalization, got %d", stats["peerA"].Finalized) + } +} + +func TestEMADecay(t *testing.T) { + tr := New() + chain := newMockChain() + tr.Start(chain) + defer tr.Stop() + + tx := makeTx(1) + tr.NotifyReceived("peerA", []*types.Transaction{tx}) + + // Include in block 1. + chain.addBlock(1, []*types.Transaction{tx}) + chain.sendHead(1) + waitForHead() + + // Send 30 empty blocks — EMA should decay close to zero. + for i := uint64(2); i <= 31; i++ { + chain.addBlock(i, nil) + chain.sendHead(i) + waitForHead() + } + + stats := tr.GetAllPeerStats() + if stats["peerA"].RecentIncluded > 0.02 { + t.Fatalf("expected RecentIncluded near zero after 30 empty blocks, got %f", stats["peerA"].RecentIncluded) + } +} From e99330b2bc92223742643284409326c3c41a5005 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 10:26:33 +0200 Subject: [PATCH 06/38] eth/txtracker: seed lastFinalNum at startup to prevent genesis backfill lastFinalNum started at 0, so the first checkFinalization after startup iterated from block 1 to the current finalized head (~20M blocks on mainnet) under the mutex, stalling the tracker and potentially awarding bogus credit for ancient txs whose hashes happened to match recently-received ones. Seed lastFinalNum from chain.CurrentFinalBlock() in Start() so only blocks finalized after startup are processed. --- eth/txtracker/tracker.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go index 5a18402645..7807c8352a 100644 --- a/eth/txtracker/tracker.go +++ b/eth/txtracker/tracker.go @@ -74,6 +74,10 @@ func New() *Tracker { // Start begins listening for chain head events. func (t *Tracker) Start(chain Chain) { t.chain = chain + // Seed lastFinalNum so checkFinalization doesn't backfill from genesis. + if fh := chain.CurrentFinalBlock(); fh != nil { + t.lastFinalNum = fh.Number.Uint64() + } t.headCh = make(chan core.ChainHeadEvent, 128) t.sub = chain.SubscribeChainHeadEvent(t.headCh) t.wg.Add(1) From a1a5d7332445fe4b25f4a72615628225b6de2579 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 10:33:36 +0200 Subject: [PATCH 07/38] eth: only record deliverers for pool-accepted transactions NotifyReceived was called before pool validation, allowing a peer to claim deliverer credit by replaying already-included txs or sending invalid packets. Rename to NotifyAccepted (takes hashes, not full txs). Call it from a new enqueueAndTrack helper in handler_eth.go that runs after Enqueue and checks pool.Has to identify accepted txs. Only accepted txs are credited to the delivering peer. --- eth/handler_eth.go | 33 +++++++++++++++++++++++++++++---- eth/txtracker/tracker.go | 11 ++++++----- eth/txtracker/tracker_test.go | 27 ++++++++++++++++++--------- 3 files changed, 53 insertions(+), 18 deletions(-) diff --git a/eth/handler_eth.go b/eth/handler_eth.go index 8974e4b8ab..82ecfa1fb6 100644 --- a/eth/handler_eth.go +++ b/eth/handler_eth.go @@ -66,28 +66,53 @@ func (h *ethHandler) Handle(peer *eth.Peer, packet eth.Packet) error { if err != nil { return fmt.Errorf("Transactions: %v", err) } - h.txTracker.NotifyReceived(peer.ID(), txs) if err := handleTransactions(peer, txs, true); err != nil { return fmt.Errorf("Transactions: %v", err) } - return h.txFetcher.Enqueue(peer.ID(), txs, false) + h.enqueueAndTrack(peer.ID(), txs, false) + return nil case *eth.PooledTransactionsPacket: txs, err := packet.List.Items() if err != nil { return fmt.Errorf("PooledTransactions: %v", err) } - h.txTracker.NotifyReceived(peer.ID(), txs) if err := handleTransactions(peer, txs, false); err != nil { return fmt.Errorf("PooledTransactions: %v", err) } - return h.txFetcher.Enqueue(peer.ID(), txs, true) + h.enqueueAndTrack(peer.ID(), txs, true) + return nil default: return fmt.Errorf("unexpected eth packet type: %T", packet) } } +// enqueueAndTrack sends transactions to the fetcher for pool submission and +// notifies the tracker for any that were accepted by the pool. +func (h *ethHandler) enqueueAndTrack(peer string, txs []*types.Transaction, direct bool) { + // Collect hashes before enqueue (Enqueue may reorder/filter the slice). + hashes := make([]common.Hash, len(txs)) + for i, tx := range txs { + hashes[i] = tx.Hash() + } + // Enqueue submits to pool via addTxs callback. After return, check + // which txs the pool now knows about (accepted, not rejected). + h.txFetcher.Enqueue(peer, txs, direct) + + // Credit the peer for txs the pool accepted. We check pool.Has + // because Enqueue doesn't return per-tx acceptance status. + var accepted []common.Hash + for _, hash := range hashes { + if h.txpool.Has(hash) { + accepted = append(accepted, hash) + } + } + if len(accepted) > 0 { + h.txTracker.NotifyAccepted(peer, accepted) + } +} + // handleTransactions marks all given transactions as known to the peer // and performs basic validations. func handleTransactions(peer *eth.Peer, list []*types.Transaction, directBroadcast bool) error { diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go index 7807c8352a..beaab60977 100644 --- a/eth/txtracker/tracker.go +++ b/eth/txtracker/tracker.go @@ -1,6 +1,6 @@ // Package txtracker provides minimal per-peer transaction inclusion tracking. // -// It records which peer delivered each transaction body (via NotifyReceived) +// It records which peer delivered each accepted transaction (via NotifyAccepted) // and monitors the chain for inclusion and finalization events. When a // delivered transaction is finalized on chain, the delivering peer is // credited. A per-block exponential moving average (EMA) of inclusions @@ -91,14 +91,15 @@ func (t *Tracker) Stop() { t.wg.Wait() } -// NotifyReceived records that a peer delivered transaction bodies. +// NotifyAccepted records that a peer delivered transactions that were accepted +// by the pool. Only accepted (not rejected/duplicate) txs should be recorded +// to prevent attribution poisoning from replayed or invalid txs. // Safe to call from any goroutine. -func (t *Tracker) NotifyReceived(peer string, txs []*types.Transaction) { +func (t *Tracker) NotifyAccepted(peer string, hashes []common.Hash) { t.mu.Lock() defer t.mu.Unlock() - for _, tx := range txs { - hash := tx.Hash() + for _, hash := range hashes { if _, ok := t.txs[hash]; ok { continue // already tracked, keep first deliverer } diff --git a/eth/txtracker/tracker_test.go b/eth/txtracker/tracker_test.go index dc346f5255..35c315ac8f 100644 --- a/eth/txtracker/tracker_test.go +++ b/eth/txtracker/tracker_test.go @@ -6,6 +6,7 @@ import ( "testing" "time" + "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/event" @@ -62,6 +63,14 @@ func (c *mockChain) sendHead(num uint64) { }) } +func hashTxs(txs []*types.Transaction) []common.Hash { + hashes := make([]common.Hash, len(txs)) + for i, tx := range txs { + hashes[i] = tx.Hash() + } + return hashes +} + func makeTx(nonce uint64) *types.Transaction { return types.NewTx(&types.LegacyTx{Nonce: nonce, GasPrice: big.NewInt(1), Gas: 21000}) } @@ -78,7 +87,7 @@ func TestNotifyReceived(t *testing.T) { defer tr.Stop() txs := []*types.Transaction{makeTx(1), makeTx(2), makeTx(3)} - tr.NotifyReceived("peerA", txs) + tr.NotifyAccepted("peerA", hashTxs(txs)) // No chain events yet — stats should be empty. stats := tr.GetAllPeerStats() @@ -94,7 +103,7 @@ func TestInclusionEMA(t *testing.T) { defer tr.Stop() tx := makeTx(1) - tr.NotifyReceived("peerA", []*types.Transaction{tx}) + tr.NotifyAccepted("peerA", []common.Hash{tx.Hash()}) // Block 1 includes peerA's tx. chain.addBlock(1, []*types.Transaction{tx}) @@ -125,7 +134,7 @@ func TestFinalization(t *testing.T) { defer tr.Stop() tx := makeTx(1) - tr.NotifyReceived("peerA", []*types.Transaction{tx}) + tr.NotifyAccepted("peerA", []common.Hash{tx.Hash()}) // Include in block 1. chain.addBlock(1, []*types.Transaction{tx}) @@ -158,8 +167,8 @@ func TestMultiplePeers(t *testing.T) { tx1 := makeTx(1) tx2 := makeTx(2) - tr.NotifyReceived("peerA", []*types.Transaction{tx1}) - tr.NotifyReceived("peerB", []*types.Transaction{tx2}) + tr.NotifyAccepted("peerA", []common.Hash{tx1.Hash()}) + tr.NotifyAccepted("peerB", []common.Hash{tx2.Hash()}) // Both included in block 1. chain.addBlock(1, []*types.Transaction{tx1, tx2}) @@ -188,8 +197,8 @@ func TestFirstDelivererWins(t *testing.T) { defer tr.Stop() tx := makeTx(1) - tr.NotifyReceived("peerA", []*types.Transaction{tx}) - tr.NotifyReceived("peerB", []*types.Transaction{tx}) // duplicate, should be ignored + tr.NotifyAccepted("peerA", []common.Hash{tx.Hash()}) + tr.NotifyAccepted("peerB", []common.Hash{tx.Hash()}) // duplicate, should be ignored chain.addBlock(1, []*types.Transaction{tx}) chain.sendHead(1) @@ -216,7 +225,7 @@ func TestNoFinalizationCredit(t *testing.T) { defer tr.Stop() tx := makeTx(1) - tr.NotifyReceived("peerA", []*types.Transaction{tx}) + tr.NotifyAccepted("peerA", []common.Hash{tx.Hash()}) // Include but don't finalize. chain.addBlock(1, []*types.Transaction{tx}) @@ -241,7 +250,7 @@ func TestEMADecay(t *testing.T) { defer tr.Stop() tx := makeTx(1) - tr.NotifyReceived("peerA", []*types.Transaction{tx}) + tr.NotifyAccepted("peerA", []common.Hash{tx.Hash()}) // Include in block 1. chain.addBlock(1, []*types.Transaction{tx}) From 3785d43db4476a34ae9fb65264038ec0cdcde65f Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 10:34:33 +0200 Subject: [PATCH 08/38] eth/txtracker: fetch head block by hash to avoid reorg-stale EMA handleChainHead fetched the block by number only. If the tracker goroutine lagged and that height was reorged before processing, the EMA was computed from the wrong canonical block. Use GetBlock(hash, number) with the header hash from the event to fetch the exact block the event refers to, not whatever is currently canonical at that height. --- eth/txtracker/tracker.go | 6 ++++-- eth/txtracker/tracker_test.go | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go index beaab60977..8458c8253d 100644 --- a/eth/txtracker/tracker.go +++ b/eth/txtracker/tracker.go @@ -37,6 +37,7 @@ type PeerStats struct { type Chain interface { SubscribeChainHeadEvent(ch chan<- core.ChainHeadEvent) event.Subscription GetBlockByNumber(number uint64) *types.Block + GetBlock(hash common.Hash, number uint64) *types.Block CurrentFinalBlock() *types.Header } @@ -146,8 +147,9 @@ func (t *Tracker) loop() { } func (t *Tracker) handleChainHead(ev core.ChainHeadEvent) { - // Update recent-inclusion EMA from the new head block. - block := t.chain.GetBlockByNumber(ev.Header.Number.Uint64()) + // Fetch the head block by hash (not just number) to avoid using a + // reorged block if the tracker goroutine lags behind the chain. + block := t.chain.GetBlock(ev.Header.Hash(), ev.Header.Number.Uint64()) if block == nil { return } diff --git a/eth/txtracker/tracker_test.go b/eth/txtracker/tracker_test.go index 35c315ac8f..36d72def23 100644 --- a/eth/txtracker/tracker_test.go +++ b/eth/txtracker/tracker_test.go @@ -35,6 +35,11 @@ func (c *mockChain) GetBlockByNumber(number uint64) *types.Block { return c.blocks[number] } +func (c *mockChain) GetBlock(hash common.Hash, number uint64) *types.Block { + // In tests we only key by number; ignore hash. + return c.GetBlockByNumber(number) +} + func (c *mockChain) CurrentFinalBlock() *types.Header { c.mu.Lock() defer c.mu.Unlock() From 6d53acfa2273f6195cdbe7ea8f25b841dbae9c97 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 10:35:28 +0200 Subject: [PATCH 09/38] eth/txtracker: prune peer stats on disconnect Peer stats were never pruned, so the peers map grew with every peer ever seen. The EMA decay loop and stats copy iterated all historical peers on every block/query. Add NotifyPeerDrop(peer) that deletes the peer's stats entry. Called from handler.unregisterPeer alongside txFetcher.Drop. --- eth/handler.go | 1 + eth/txtracker/tracker.go | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/eth/handler.go b/eth/handler.go index 90d74a71bf..3b6dbec1a5 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -406,6 +406,7 @@ func (h *handler) unregisterPeer(id string) { } h.downloader.UnregisterPeer(id) h.txFetcher.Drop(id) + h.txTracker.NotifyPeerDrop(id) if err := h.peers.unregisterPeer(id); err != nil { logger.Error("Ethereum peer removal failed", "err", err) diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go index 8458c8253d..28f7f27aab 100644 --- a/eth/txtracker/tracker.go +++ b/eth/txtracker/tracker.go @@ -85,6 +85,14 @@ func (t *Tracker) Start(chain Chain) { go t.loop() } +// NotifyPeerDrop removes a disconnected peer's stats to prevent unbounded +// growth. Safe to call from any goroutine. +func (t *Tracker) NotifyPeerDrop(peer string) { + t.mu.Lock() + defer t.mu.Unlock() + delete(t.peers, peer) +} + // Stop shuts down the tracker. func (t *Tracker) Stop() { t.sub.Unsubscribe() From 44c8a5b7f427645c518919734a35d115742ce656 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 10:36:59 +0200 Subject: [PATCH 10/38] eth: base protection quota on current peer count, not max capacity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit protectTopN used maxPeers (configured capacity) to compute the number of peers to protect. With small droppable sets this could protect everyone, permanently disabling churn. Use len(entries) (current droppable count in each category) instead. With 20 droppable dialed peers and 10% fraction, 2 are protected. With 3 droppable peers, 0 are protected — churn is never blocked. --- eth/dropper.go | 8 ++++---- eth/dropper_test.go | 15 ++++++++++----- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/eth/dropper.go b/eth/dropper.go index 87c72cfa0f..2eabd92fe7 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -218,8 +218,8 @@ func (cm *dropper) filterProtectedPeers(droppable []*p2p.Peer) []*p2p.Peer { } protectedSet := make(map[*p2p.Peer]struct{}) - protectTopN := func(entries []peerWithStats, maxPeers int, cat protectionCategory) { - n := int(float64(maxPeers) * cat.frac) + protectTopN := func(entries []peerWithStats, cat protectionCategory) { + n := int(float64(len(entries)) * cat.frac) if n == 0 || len(entries) == 0 { return } @@ -238,8 +238,8 @@ func (cm *dropper) filterProtectedPeers(droppable []*p2p.Peer) []*p2p.Peer { dialCopy := make([]peerWithStats, len(dialed)) copy(dialCopy, dialed) - protectTopN(inCopy, cm.maxInboundPeers, cat) - protectTopN(dialCopy, cm.maxDialPeers, cat) + protectTopN(inCopy, cat) + protectTopN(dialCopy, cat) } if len(protectedSet) == 0 { return droppable diff --git a/eth/dropper_test.go b/eth/dropper_test.go index 8c08893ee3..9f8b55a0b6 100644 --- a/eth/dropper_test.go +++ b/eth/dropper_test.go @@ -43,7 +43,7 @@ func TestFilterProtectedEmptyStats(t *testing.T) { } func TestFilterProtectedTopPeer(t *testing.T) { - // 20 peers, maxDialPeers=20, 10% = 2 protected per category. + // 20 peers, 10% of 20 = 2 protected per category. // NewPeer creates non-inbound peers, so all go to dialed bucket. cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} @@ -108,18 +108,23 @@ func TestFilterProtectedOverlap(t *testing.T) { } func TestFilterProtectedAllProtected(t *testing.T) { - // Only 2 droppable peers, both are top by different categories. + // 10 peers: 10% = 1 per category. Give all 10 peers high scores in + // one of the two categories so the union covers everyone. cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} - peers := makePeers(2) + peers := makePeers(10) stats := make(map[string]PeerInclusionStats) + // Peer 0 has the highest Finalized → protected by total-finalized. stats[peers[0].ID().String()] = PeerInclusionStats{Finalized: 100} + // Peer 1 has the highest RecentIncluded → protected by recent-included. stats[peers[1].ID().String()] = PeerInclusionStats{RecentIncluded: 5.0} cm.peerStatsFunc = func() map[string]PeerInclusionStats { return stats } result := cm.filterProtectedPeers(peers) - if len(result) != 0 { - t.Fatalf("expected all peers protected, got %d droppable", len(result)) + // 10% of 10 = 1 per category, 2 categories = 2 protected. + // 10 - 2 = 8 droppable. + if len(result) != 8 { + t.Fatalf("expected 8 droppable peers, got %d", len(result)) } } From f66323d768171e15705bae14baf48af9b4690919 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 10:37:42 +0200 Subject: [PATCH 11/38] eth: add LGPL copyright headers to new files Add the standard go-ethereum LGPL header to tracker.go, tracker_test.go, and dropper_test.go. --- eth/dropper_test.go | 16 ++++++++++++++++ eth/txtracker/tracker.go | 16 ++++++++++++++++ eth/txtracker/tracker_test.go | 16 ++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/eth/dropper_test.go b/eth/dropper_test.go index 9f8b55a0b6..484b0cd23b 100644 --- a/eth/dropper_test.go +++ b/eth/dropper_test.go @@ -1,3 +1,19 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + package eth import ( diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go index 28f7f27aab..f2afae4dd4 100644 --- a/eth/txtracker/tracker.go +++ b/eth/txtracker/tracker.go @@ -1,3 +1,19 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + // Package txtracker provides minimal per-peer transaction inclusion tracking. // // It records which peer delivered each accepted transaction (via NotifyAccepted) diff --git a/eth/txtracker/tracker_test.go b/eth/txtracker/tracker_test.go index 36d72def23..5371c7eb2a 100644 --- a/eth/txtracker/tracker_test.go +++ b/eth/txtracker/tracker_test.go @@ -1,3 +1,19 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + package txtracker import ( From 47c603388a01341d36616e7ffe21170bb03dd7ec Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 11:18:25 +0200 Subject: [PATCH 12/38] eth/fetcher: add onAccepted callback to fix attribution race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit enqueueAndTrack used pool.Has() after Enqueue to determine accepted txs. Under concurrent delivery of the same tx from two peers, both could see Has()==true, making attribution non-deterministic. Add an onAccepted callback to the fetcher, called from Enqueue with (peer, acceptedHashes) immediately after pool.Add returns for each batch. Attribution happens atomically inside Enqueue using the per-tx error from addTxs (nil = accepted), before another goroutine can race. Remove the enqueueAndTrack helper from handler_eth.go — the fetcher now handles notification directly. --- eth/fetcher/tx_fetcher.go | 23 ++++++++++++++++------- eth/fetcher/tx_fetcher_test.go | 3 ++- eth/handler.go | 2 +- eth/handler_eth.go | 31 ++----------------------------- 4 files changed, 21 insertions(+), 38 deletions(-) diff --git a/eth/fetcher/tx_fetcher.go b/eth/fetcher/tx_fetcher.go index 5817dfbcf5..6e916747da 100644 --- a/eth/fetcher/tx_fetcher.go +++ b/eth/fetcher/tx_fetcher.go @@ -181,9 +181,10 @@ type TxFetcher struct { // Callbacks validateMeta func(common.Hash, byte) error // Validate a tx metadata based on the local txpool - addTxs func([]*types.Transaction) []error // Insert a batch of transactions into local txpool - fetchTxs func(string, []common.Hash) error // Retrieves a set of txs from a remote peer - dropPeer func(string) // Drops a peer in case of announcement violation + addTxs func([]*types.Transaction) []error // Insert a batch of transactions into local txpool + fetchTxs func(string, []common.Hash) error // Retrieves a set of txs from a remote peer + dropPeer func(string) // Drops a peer in case of announcement violation + onAccepted func(peer string, hashes []common.Hash) // Optional: notified with accepted tx hashes per peer step chan struct{} // Notification channel when the fetcher loop iterates clock mclock.Clock // Monotonic clock or simulated clock for tests @@ -194,15 +195,15 @@ type TxFetcher struct { // NewTxFetcher creates a transaction fetcher to retrieve transaction // based on hash announcements. // Chain can be nil to disable on-chain checks. -func NewTxFetcher(chain *core.BlockChain, validateMeta func(common.Hash, byte) error, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string)) *TxFetcher { - return NewTxFetcherForTests(chain, validateMeta, addTxs, fetchTxs, dropPeer, mclock.System{}, time.Now, nil) +func NewTxFetcher(chain *core.BlockChain, validateMeta func(common.Hash, byte) error, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string), onAccepted func(string, []common.Hash)) *TxFetcher { + return NewTxFetcherForTests(chain, validateMeta, addTxs, fetchTxs, dropPeer, onAccepted, mclock.System{}, time.Now, nil) } // NewTxFetcherForTests is a testing method to mock out the realtime clock with // a simulated version and the internal randomness with a deterministic one. // Chain can be nil to disable on-chain checks. func NewTxFetcherForTests( - chain *core.BlockChain, validateMeta func(common.Hash, byte) error, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string), + chain *core.BlockChain, validateMeta func(common.Hash, byte) error, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string), onAccepted func(string, []common.Hash), clock mclock.Clock, realTime func() time.Time, rand *mrand.Rand) *TxFetcher { return &TxFetcher{ notify: make(chan *txAnnounce), @@ -224,6 +225,7 @@ func NewTxFetcherForTests( addTxs: addTxs, fetchTxs: fetchTxs, dropPeer: dropPeer, + onAccepted: onAccepted, clock: clock, realTime: realTime, rand: rand, @@ -344,6 +346,8 @@ func (f *TxFetcher) Enqueue(peer string, txs []*types.Transaction, direct bool) ) batch := txs[i:end] + var accepted []common.Hash + for j, err := range f.addTxs(batch) { // Track the transaction hash if the price is too low for us. // Avoid re-request this transaction when we receive another @@ -353,7 +357,8 @@ func (f *TxFetcher) Enqueue(peer string, txs []*types.Transaction, direct bool) } // Track a few interesting failure types switch { - case err == nil: // Noop, but need to handle to not count these + case err == nil: + accepted = append(accepted, batch[j].Hash()) case errors.Is(err, txpool.ErrAlreadyKnown): duplicate++ @@ -385,6 +390,10 @@ func (f *TxFetcher) Enqueue(peer string, txs []*types.Transaction, direct bool) underpricedMeter.Mark(underpriced) otherRejectMeter.Mark(otherreject) + // Notify the tracker which txs from this peer were accepted. + if f.onAccepted != nil && len(accepted) > 0 { + f.onAccepted(peer, accepted) + } // If 'other reject' is >25% of the deliveries in any batch, sleep a bit. if otherreject > int64((len(batch)+3)/4) { log.Debug("Peer delivering stale or invalid transactions", "peer", peer, "rejected", otherreject) diff --git a/eth/fetcher/tx_fetcher_test.go b/eth/fetcher/tx_fetcher_test.go index 6c2719631e..3fe11fda21 100644 --- a/eth/fetcher/tx_fetcher_test.go +++ b/eth/fetcher/tx_fetcher_test.go @@ -97,7 +97,7 @@ func newTestTxFetcher() *TxFetcher { return make([]error, len(txs)) }, func(string, []common.Hash) error { return nil }, - nil, + nil, nil, ) } @@ -2203,6 +2203,7 @@ func TestTransactionForgotten(t *testing.T) { }, func(string, []common.Hash) error { return nil }, func(string) {}, + nil, mockClock, mockTime, rand.New(rand.NewSource(0)), // Use fixed seed for deterministic behavior diff --git a/eth/handler.go b/eth/handler.go index 3b6dbec1a5..69469fddd0 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -190,8 +190,8 @@ func newHandler(config *handlerConfig) (*handler, error) { } return nil } - h.txFetcher = fetcher.NewTxFetcher(h.chain, validateMeta, addTxs, fetchTx, h.removePeer) h.txTracker = txtracker.New() + h.txFetcher = fetcher.NewTxFetcher(h.chain, validateMeta, addTxs, fetchTx, h.removePeer, h.txTracker.NotifyAccepted) return h, nil } diff --git a/eth/handler_eth.go b/eth/handler_eth.go index 82ecfa1fb6..8704a86af4 100644 --- a/eth/handler_eth.go +++ b/eth/handler_eth.go @@ -69,8 +69,7 @@ func (h *ethHandler) Handle(peer *eth.Peer, packet eth.Packet) error { if err := handleTransactions(peer, txs, true); err != nil { return fmt.Errorf("Transactions: %v", err) } - h.enqueueAndTrack(peer.ID(), txs, false) - return nil + return h.txFetcher.Enqueue(peer.ID(), txs, false) case *eth.PooledTransactionsPacket: txs, err := packet.List.Items() @@ -80,39 +79,13 @@ func (h *ethHandler) Handle(peer *eth.Peer, packet eth.Packet) error { if err := handleTransactions(peer, txs, false); err != nil { return fmt.Errorf("PooledTransactions: %v", err) } - h.enqueueAndTrack(peer.ID(), txs, true) - return nil + return h.txFetcher.Enqueue(peer.ID(), txs, true) default: return fmt.Errorf("unexpected eth packet type: %T", packet) } } -// enqueueAndTrack sends transactions to the fetcher for pool submission and -// notifies the tracker for any that were accepted by the pool. -func (h *ethHandler) enqueueAndTrack(peer string, txs []*types.Transaction, direct bool) { - // Collect hashes before enqueue (Enqueue may reorder/filter the slice). - hashes := make([]common.Hash, len(txs)) - for i, tx := range txs { - hashes[i] = tx.Hash() - } - // Enqueue submits to pool via addTxs callback. After return, check - // which txs the pool now knows about (accepted, not rejected). - h.txFetcher.Enqueue(peer, txs, direct) - - // Credit the peer for txs the pool accepted. We check pool.Has - // because Enqueue doesn't return per-tx acceptance status. - var accepted []common.Hash - for _, hash := range hashes { - if h.txpool.Has(hash) { - accepted = append(accepted, hash) - } - } - if len(accepted) > 0 { - h.txTracker.NotifyAccepted(peer, accepted) - } -} - // handleTransactions marks all given transactions as known to the peer // and performs basic validations. func handleTransactions(peer *eth.Peer, list []*types.Transaction, directBroadcast bool) error { From db611822dbe215c353b25cf564c8e91afba9e127 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 11:24:03 +0200 Subject: [PATCH 13/38] eth: rename droppedProtected metric to dropSkipped --- eth/dropper.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eth/dropper.go b/eth/dropper.go index 2eabd92fe7..d79edc9795 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -52,9 +52,9 @@ var ( droppedInbound = metrics.NewRegisteredMeter("eth/dropper/inbound", nil) // droppedOutbound is the number of outbound peers dropped droppedOutbound = metrics.NewRegisteredMeter("eth/dropper/outbound", nil) - // droppedProtected counts times a drop was skipped because all + // dropSkipped counts times a drop was skipped because all // droppable candidates were protected by inclusion stats. - droppedProtected = metrics.NewRegisteredMeter("eth/dropper/protected", nil) + dropSkipped = metrics.NewRegisteredMeter("eth/dropper/protected", nil) ) // PeerInclusionStats holds the per-peer inclusion data needed by the dropper @@ -177,7 +177,7 @@ func (cm *dropper) dropRandomPeer() bool { if cm.peerStatsFunc != nil { droppable = cm.filterProtectedPeers(droppable) if len(droppable) == 0 { - droppedProtected.Mark(1) + dropSkipped.Mark(1) return false } } From 1c518be79f2207bc54d93eedb3c5305b9a191a48 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 11:50:11 +0200 Subject: [PATCH 14/38] =?UTF-8?q?eth:=20simplify=20peer=20protection=20?= =?UTF-8?q?=E2=80=94=20compute=20protected=20set=20upfront?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compute the protected peer set once in dropRandomPeer via protectedPeers(), then include protection as a condition in selectDoNotDrop alongside trusted/static/recent checks. This eliminates the separate filterProtectedPeers post-pass and the awkward "all protected → skip" branch. Rename filterProtectedPeers to protectedPeers, returning map[*p2p.Peer]bool instead of filtering a slice. The map is checked directly in selectDoNotDrop via protected[p]. --- eth/dropper.go | 57 ++++++++++++------------------ eth/dropper_test.go | 84 +++++++++++++++++---------------------------- 2 files changed, 53 insertions(+), 88 deletions(-) diff --git a/eth/dropper.go b/eth/dropper.go index d79edc9795..5b49a98185 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -159,27 +159,23 @@ func (cm *dropper) dropRandomPeer() bool { } numDialed := len(peers) - numInbound + // Compute the set of inclusion-protected peers before filtering. + protected := cm.protectedPeers(peers) + selectDoNotDrop := func(p *p2p.Peer) bool { - // Avoid dropping trusted and static peers, or recent peers. - // Only drop peers if their respective category (dialed/inbound) - // is close to limit capacity. return p.Trusted() || p.StaticDialed() || p.Lifetime() < mclock.AbsTime(doNotDropBefore) || (p.DynDialed() && cm.maxDialPeers-numDialed > peerDropThreshold) || - (p.Inbound() && cm.maxInboundPeers-numInbound > peerDropThreshold) + (p.Inbound() && cm.maxInboundPeers-numInbound > peerDropThreshold) || + protected[p] } droppable := slices.DeleteFunc(peers, selectDoNotDrop) if len(droppable) == 0 { - return false - } - // Protect peers with the highest inclusion stats. - if cm.peerStatsFunc != nil { - droppable = cm.filterProtectedPeers(droppable) - if len(droppable) == 0 { + if len(protected) > 0 { dropSkipped.Mark(1) - return false } + return false } p := droppable[mrand.Intn(len(droppable))] log.Debug("Dropping random peer", "inbound", p.Inbound(), @@ -193,34 +189,35 @@ func (cm *dropper) dropRandomPeer() bool { return true } -// filterProtectedPeers removes peers from the droppable list that are -// protected by any of the protection categories. Each category independently -// selects the top-N peers per inbound/dialed pool by score; the union of all -// selections is protected. -func (cm *dropper) filterProtectedPeers(droppable []*p2p.Peer) []*p2p.Peer { +// protectedPeers computes the set of peers that should not be dropped based +// on inclusion stats. Each protection category independently selects its +// top-N peers per inbound/dialed pool; the union is returned. +func (cm *dropper) protectedPeers(peers []*p2p.Peer) map[*p2p.Peer]bool { + if cm.peerStatsFunc == nil { + return nil + } stats := cm.peerStatsFunc() if len(stats) == 0 { - return droppable + return nil } type peerWithStats struct { peer *p2p.Peer s PeerInclusionStats } var inbound, dialed []peerWithStats - for _, p := range droppable { - id := p.ID().String() - entry := peerWithStats{p, stats[id]} + for _, p := range peers { + entry := peerWithStats{p, stats[p.ID().String()]} if p.Inbound() { inbound = append(inbound, entry) } else { dialed = append(dialed, entry) } } - protectedSet := make(map[*p2p.Peer]struct{}) + result := make(map[*p2p.Peer]bool) protectTopN := func(entries []peerWithStats, cat protectionCategory) { n := int(float64(len(entries)) * cat.frac) - if n == 0 || len(entries) == 0 { + if n == 0 { return } sort.Slice(entries, func(i, j int) bool { @@ -228,7 +225,7 @@ func (cm *dropper) filterProtectedPeers(droppable []*p2p.Peer) []*p2p.Peer { }) for i := 0; i < n && i < len(entries); i++ { if cat.score(entries[i].s) > 0 { - protectedSet[entries[i].peer] = struct{}{} + result[entries[i].peer] = true } } } @@ -237,21 +234,11 @@ func (cm *dropper) filterProtectedPeers(droppable []*p2p.Peer) []*p2p.Peer { copy(inCopy, inbound) dialCopy := make([]peerWithStats, len(dialed)) copy(dialCopy, dialed) - protectTopN(inCopy, cat) protectTopN(dialCopy, cat) } - if len(protectedSet) == 0 { - return droppable - } - log.Debug("Protecting high-value peers from drop", - "protected", len(protectedSet), "droppable", len(droppable)) - - result := make([]*p2p.Peer, 0, len(droppable)) - for _, p := range droppable { - if _, ok := protectedSet[p]; !ok { - result = append(result, p) - } + if len(result) > 0 { + log.Debug("Protecting high-value peers from drop", "protected", len(result)) } return result } diff --git a/eth/dropper_test.go b/eth/dropper_test.go index 484b0cd23b..ba1eb66faf 100644 --- a/eth/dropper_test.go +++ b/eth/dropper_test.go @@ -33,114 +33,92 @@ func makePeers(n int) []*p2p.Peer { return peers } -func TestFilterProtectedNoStats(t *testing.T) { - // When the stats func returns nil/empty, all peers remain droppable. +func TestProtectedPeersNoStats(t *testing.T) { cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} cm.peerStatsFunc = func() map[string]PeerInclusionStats { return nil } peers := makePeers(10) - result := cm.filterProtectedPeers(peers) - if len(result) != len(peers) { - t.Fatalf("expected all peers droppable with nil stats, got %d/%d", len(result), len(peers)) + protected := cm.protectedPeers(peers) + if len(protected) != 0 { + t.Fatalf("expected no protected peers with nil stats, got %d", len(protected)) } } -func TestFilterProtectedEmptyStats(t *testing.T) { +func TestProtectedPeersEmptyStats(t *testing.T) { cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} cm.peerStatsFunc = func() map[string]PeerInclusionStats { return map[string]PeerInclusionStats{} } peers := makePeers(10) - result := cm.filterProtectedPeers(peers) - if len(result) != len(peers) { - t.Fatalf("expected all peers droppable with empty stats, got %d/%d", len(result), len(peers)) + protected := cm.protectedPeers(peers) + if len(protected) != 0 { + t.Fatalf("expected no protected peers with empty stats, got %d", len(protected)) } } -func TestFilterProtectedTopPeer(t *testing.T) { +func TestProtectedPeersTopPeer(t *testing.T) { // 20 peers, 10% of 20 = 2 protected per category. - // NewPeer creates non-inbound peers, so all go to dialed bucket. cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} peers := makePeers(20) stats := make(map[string]PeerInclusionStats) - // Peer 0: top by Finalized stats[peers[0].ID().String()] = PeerInclusionStats{Finalized: 100} - // Peer 1: top by RecentIncluded stats[peers[1].ID().String()] = PeerInclusionStats{RecentIncluded: 5.0} cm.peerStatsFunc = func() map[string]PeerInclusionStats { return stats } - result := cm.filterProtectedPeers(peers) - // 2 categories × 2 protected each = up to 4, but peers 0 and 1 are - // different so both should be removed. Other peers have zero scores. - protected := len(peers) - len(result) - if protected != 2 { - t.Fatalf("expected 2 protected peers, got %d", protected) + protected := cm.protectedPeers(peers) + if len(protected) != 2 { + t.Fatalf("expected 2 protected peers, got %d", len(protected)) } - // Verify peers 0 and 1 are not in result. - for _, p := range result { - id := p.ID().String() - if id == peers[0].ID().String() || id == peers[1].ID().String() { - t.Fatalf("peer %s should be protected", id) - } + if !protected[peers[0]] { + t.Fatal("peer 0 should be protected (top Finalized)") + } + if !protected[peers[1]] { + t.Fatal("peer 1 should be protected (top RecentIncluded)") } } -func TestFilterProtectedZeroScore(t *testing.T) { +func TestProtectedPeersZeroScore(t *testing.T) { cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} peers := makePeers(10) stats := make(map[string]PeerInclusionStats) - // All peers have zero stats. for _, p := range peers { stats[p.ID().String()] = PeerInclusionStats{} } cm.peerStatsFunc = func() map[string]PeerInclusionStats { return stats } - result := cm.filterProtectedPeers(peers) - if len(result) != len(peers) { - t.Fatalf("expected no protection with zero scores, got %d protected", len(peers)-len(result)) + protected := cm.protectedPeers(peers) + if len(protected) != 0 { + t.Fatalf("expected no protection with zero scores, got %d", len(protected)) } } -func TestFilterProtectedOverlap(t *testing.T) { - // One peer is top in both categories — should only be removed once. +func TestProtectedPeersOverlap(t *testing.T) { + // One peer is top in both categories — counted once. cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} peers := makePeers(20) stats := make(map[string]PeerInclusionStats) - // Peer 0 is top in both. stats[peers[0].ID().String()] = PeerInclusionStats{Finalized: 100, RecentIncluded: 5.0} cm.peerStatsFunc = func() map[string]PeerInclusionStats { return stats } - result := cm.filterProtectedPeers(peers) - protected := len(peers) - len(result) - if protected != 1 { - t.Fatalf("expected 1 protected peer (overlap), got %d", protected) + protected := cm.protectedPeers(peers) + if len(protected) != 1 { + t.Fatalf("expected 1 protected peer (overlap), got %d", len(protected)) } } -func TestFilterProtectedAllProtected(t *testing.T) { - // 10 peers: 10% = 1 per category. Give all 10 peers high scores in - // one of the two categories so the union covers everyone. +func TestProtectedPeersNilFunc(t *testing.T) { cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} + // peerStatsFunc is nil (default). peers := makePeers(10) - stats := make(map[string]PeerInclusionStats) - // Peer 0 has the highest Finalized → protected by total-finalized. - stats[peers[0].ID().String()] = PeerInclusionStats{Finalized: 100} - // Peer 1 has the highest RecentIncluded → protected by recent-included. - stats[peers[1].ID().String()] = PeerInclusionStats{RecentIncluded: 5.0} - - cm.peerStatsFunc = func() map[string]PeerInclusionStats { return stats } - - result := cm.filterProtectedPeers(peers) - // 10% of 10 = 1 per category, 2 categories = 2 protected. - // 10 - 2 = 8 droppable. - if len(result) != 8 { - t.Fatalf("expected 8 droppable peers, got %d", len(result)) + protected := cm.protectedPeers(peers) + if protected != nil { + t.Fatalf("expected nil with nil stats func, got %v", protected) } } From aa5fa692e2261bfa362987aca423576a34a43008 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 11:59:48 +0200 Subject: [PATCH 15/38] eth: simplify protectedPeers with generic topN helper Replace peerWithStats wrapper, manual slice copying, and protectTopN closure with a generic topN[T] function that sorts by score and returns top elements. protectedPeers now works directly with []*p2p.Peer slices, building per-category score functions that close over the stats map. --- eth/dropper.go | 62 +++++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/eth/dropper.go b/eth/dropper.go index 5b49a98185..6a8554eb83 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -189,6 +189,25 @@ func (cm *dropper) dropRandomPeer() bool { return true } +// topN returns the top n elements from items by score (descending). +// Only elements with score > 0 are included. The input slice is not modified. +func topN[T any](items []T, n int, score func(T) float64) []T { + if n == 0 || len(items) == 0 { + return nil + } + cp := make([]T, len(items)) + copy(cp, items) + sort.Slice(cp, func(i, j int) bool { return score(cp[i]) > score(cp[j]) }) + + var result []T + for i := 0; i < n && i < len(cp); i++ { + if score(cp[i]) > 0 { + result = append(result, cp[i]) + } + } + return result +} + // protectedPeers computes the set of peers that should not be dropped based // on inclusion stats. Each protection category independently selects its // top-N peers per inbound/dialed pool; the union is returned. @@ -200,42 +219,27 @@ func (cm *dropper) protectedPeers(peers []*p2p.Peer) map[*p2p.Peer]bool { if len(stats) == 0 { return nil } - type peerWithStats struct { - peer *p2p.Peer - s PeerInclusionStats - } - var inbound, dialed []peerWithStats + // Split peers by direction. + var inbound, dialed []*p2p.Peer for _, p := range peers { - entry := peerWithStats{p, stats[p.ID().String()]} if p.Inbound() { - inbound = append(inbound, entry) + inbound = append(inbound, p) } else { - dialed = append(dialed, entry) + dialed = append(dialed, p) } } result := make(map[*p2p.Peer]bool) - - protectTopN := func(entries []peerWithStats, cat protectionCategory) { - n := int(float64(len(entries)) * cat.frac) - if n == 0 { - return - } - sort.Slice(entries, func(i, j int) bool { - return cat.score(entries[i].s) > cat.score(entries[j].s) - }) - for i := 0; i < n && i < len(entries); i++ { - if cat.score(entries[i].s) > 0 { - result[entries[i].peer] = true - } - } - } for _, cat := range protectionCategories { - inCopy := make([]peerWithStats, len(inbound)) - copy(inCopy, inbound) - dialCopy := make([]peerWithStats, len(dialed)) - copy(dialCopy, dialed) - protectTopN(inCopy, cat) - protectTopN(dialCopy, cat) + // Build a score function that looks up the peer's stats. + score := func(p *p2p.Peer) float64 { + return cat.score(stats[p.ID().String()]) + } + for _, p := range topN(inbound, int(float64(len(inbound))*cat.frac), score) { + result[p] = true + } + for _, p := range topN(dialed, int(float64(len(dialed))*cat.frac), score) { + result[p] = true + } } if len(result) > 0 { log.Debug("Protecting high-value peers from drop", "protected", len(result)) From ecfdaa69c6abda07b8b721fcdcdd184cd176201d Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 12:07:19 +0200 Subject: [PATCH 16/38] eth: replace topN helper with stdlib slices.SortedFunc + DeleteFunc Remove the custom topN generic function. Use slices.SortedFunc (creates a sorted copy from an iterator) + slices.DeleteFunc (filters score <= 0) from the standard library. No custom generics needed. --- eth/dropper.go | 46 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/eth/dropper.go b/eth/dropper.go index 6a8554eb83..2c73396065 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -17,9 +17,9 @@ package eth import ( + "cmp" mrand "math/rand" "slices" - "sort" "sync" "time" @@ -189,25 +189,6 @@ func (cm *dropper) dropRandomPeer() bool { return true } -// topN returns the top n elements from items by score (descending). -// Only elements with score > 0 are included. The input slice is not modified. -func topN[T any](items []T, n int, score func(T) float64) []T { - if n == 0 || len(items) == 0 { - return nil - } - cp := make([]T, len(items)) - copy(cp, items) - sort.Slice(cp, func(i, j int) bool { return score(cp[i]) > score(cp[j]) }) - - var result []T - for i := 0; i < n && i < len(cp); i++ { - if score(cp[i]) > 0 { - result = append(result, cp[i]) - } - } - return result -} - // protectedPeers computes the set of peers that should not be dropped based // on inclusion stats. Each protection category independently selects its // top-N peers per inbound/dialed pool; the union is returned. @@ -228,18 +209,29 @@ func (cm *dropper) protectedPeers(peers []*p2p.Peer) map[*p2p.Peer]bool { dialed = append(dialed, p) } } + // protectPool selects the top-frac peers from pool by score and adds them to result. result := make(map[*p2p.Peer]bool) + protectPool := func(pool []*p2p.Peer, score func(*p2p.Peer) float64, frac float64) { + n := int(float64(len(pool)) * frac) + if n == 0 { + return + } + sorted := slices.SortedFunc(slices.Values(pool), func(a, b *p2p.Peer) int { + return cmp.Compare(score(b), score(a)) // descending + }) + top := slices.DeleteFunc(sorted[:min(n, len(sorted))], func(p *p2p.Peer) bool { + return score(p) <= 0 + }) + for _, p := range top { + result[p] = true + } + } for _, cat := range protectionCategories { - // Build a score function that looks up the peer's stats. score := func(p *p2p.Peer) float64 { return cat.score(stats[p.ID().String()]) } - for _, p := range topN(inbound, int(float64(len(inbound))*cat.frac), score) { - result[p] = true - } - for _, p := range topN(dialed, int(float64(len(dialed))*cat.frac), score) { - result[p] = true - } + protectPool(inbound, score, cat.frac) + protectPool(dialed, score, cat.frac) } if len(result) > 0 { log.Debug("Protecting high-value peers from drop", "protected", len(result)) From 803ac3c641c1b7453edecd09de4cd0029e9479ab Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 12:52:17 +0200 Subject: [PATCH 17/38] eth: improve dropper description Signed-off-by: Csaba Kiraly --- eth/dropper.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/eth/dropper.go b/eth/dropper.go index 2c73396065..eae5344142 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -85,17 +85,22 @@ var protectionCategories = []protectionCategory{ // dropper monitors the state of the peer pool and introduces churn by // periodically disconnecting a random peer to make room for new connections. +// The main goal is to allow new peers to join the network and to facilitate +// continuous topology adaptation. // // Behavior: // - During sync the Downloader handles peer connections, so dropper is disabled. // - When not syncing and a peer category (inbound or dialed) is close to its // limit, a random peer from that category is disconnected every 3–7 minutes. -// - Trusted, static, and recently connected peers are never dropped. -// - Peers that contribute the most on-chain transaction inclusions are -// protected from dropping. Two scoring categories are used (total finalized -// inclusions and recent inclusion EMA), each protecting the top 10% of -// inbound and dialed peers independently. The union of all protected sets -// is shielded; the drop target is chosen randomly from the remainder. +// - Trusted and static peers are never dropped. +// - Recently connected peers are also protected from dropping to give them time +// to prove their value before being at risk of disconnection. +// - Some peers are protected from dropping based on their role. This is not based +// on a unified score function, but rather on the concept of protected peer pools. +// Each pool independently selects its top fraction of peers by a specific score +// (e.g. total finalized inclusions, recent inclusion EMA); the union of all +// protected sets is shielded from random dropping, and the drop target is chosen +// randomly from the remainder. type dropper struct { maxDialPeers int // maximum number of dialed peers maxInboundPeers int // maximum number of inbound peers From 7f1720b3dc636e8556fd7fdcbcd2030ded1fe367 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Fri, 10 Apr 2026 16:40:08 +0200 Subject: [PATCH 18/38] eth/txtracker: compact FIFO order slice to prevent memory leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit order = order[1:] reslices without releasing the backing array. After N total insertions the array retains N hashes (32 bytes each) but only the last maxTracked are live. On a long-running node processing ~100 txs/s this leaks ~275 MB/day. Compact by copying to a fresh array when capacity exceeds 2×maxTracked. --- eth/txtracker/tracker.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go index f2afae4dd4..83d94af33f 100644 --- a/eth/txtracker/tracker.go +++ b/eth/txtracker/tracker.go @@ -137,6 +137,11 @@ func (t *Tracker) NotifyAccepted(peer string, hashes []common.Hash) { t.order = t.order[1:] delete(t.txs, oldest) } + // Compact the backing array when it grows too large. Reslicing + // with order[1:] doesn't free earlier slots in the array. + if cap(t.order) > 2*maxTracked { + t.order = append([]common.Hash(nil), t.order...) + } } // GetAllPeerStats returns a snapshot of per-peer inclusion statistics. From e2b620ab44d5676ecbfbf0e13aa8c1624c260a24 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Sat, 11 Apr 2026 16:31:44 +0200 Subject: [PATCH 19/38] eth/txtracker: prevent disconnected peers from leaking back into stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NotifyPeerDrop deleted t.peers[peer] but left t.txs entries pointing to that peer. When those txs later finalized, checkFinalization recreated the peer entry, and the EMA loop decayed it forever. Fix: create peer entries in NotifyAccepted (when txs are first accepted), not in handleChainHead or checkFinalization. Both chain event handlers now skip peers with no entry — disconnected peers whose entries were deleted by NotifyPeerDrop stay deleted. --- eth/txtracker/tracker.go | 16 ++++++++-------- eth/txtracker/tracker_test.go | 9 ++++++--- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go index 83d94af33f..842d1776c3 100644 --- a/eth/txtracker/tracker.go +++ b/eth/txtracker/tracker.go @@ -131,6 +131,10 @@ func (t *Tracker) NotifyAccepted(peer string, hashes []common.Hash) { t.txs[hash] = peer t.order = append(t.order, hash) } + // Ensure the delivering peer has a stats entry. + if len(hashes) > 0 && t.peers[peer] == nil { + t.peers[peer] = &peerStats{} + } // Evict oldest entries if over capacity. for len(t.txs) > maxTracked { oldest := t.order[0] @@ -192,12 +196,9 @@ func (t *Tracker) handleChainHead(ev core.ChainHeadEvent) { blockIncl[peer]++ } } - // Ensure peers with inclusions in this block have entries. - for peer := range blockIncl { - if t.peers[peer] == nil { - t.peers[peer] = &peerStats{} - } - } + // Only credit peers that are still tracked (not disconnected). + // Don't create entries for unknown peers — they may have been + // removed by NotifyPeerDrop and should not be resurrected. // Update EMA for all tracked peers (decay inactive ones). for peer, ps := range t.peers { ps.recentIncluded = (1-emaAlpha)*ps.recentIncluded + emaAlpha*float64(blockIncl[peer]) @@ -231,8 +232,7 @@ func (t *Tracker) checkFinalization() { } ps := t.peers[peer] if ps == nil { - ps = &peerStats{} - t.peers[peer] = ps + continue // peer disconnected, skip credit } ps.finalized++ credited++ diff --git a/eth/txtracker/tracker_test.go b/eth/txtracker/tracker_test.go index 5371c7eb2a..22ddabf0e9 100644 --- a/eth/txtracker/tracker_test.go +++ b/eth/txtracker/tracker_test.go @@ -110,10 +110,13 @@ func TestNotifyReceived(t *testing.T) { txs := []*types.Transaction{makeTx(1), makeTx(2), makeTx(3)} tr.NotifyAccepted("peerA", hashTxs(txs)) - // No chain events yet — stats should be empty. + // No chain events yet — peer entry exists but with zero stats. stats := tr.GetAllPeerStats() - if len(stats) != 0 { - t.Fatalf("expected empty stats before any chain events, got %d peers", len(stats)) + if stats["peerA"].Finalized != 0 { + t.Fatalf("expected zero Finalized before chain events, got %d", stats["peerA"].Finalized) + } + if stats["peerA"].RecentIncluded != 0 { + t.Fatalf("expected zero RecentIncluded before chain events, got %f", stats["peerA"].RecentIncluded) } } From df517d1f3a2751591f19688e23d742790d4b200d Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Sat, 11 Apr 2026 16:35:54 +0200 Subject: [PATCH 20/38] eth/txtracker: replace time.Sleep with deterministic step channel Tests used time.Sleep(50ms) to wait for async chain head processing, making the suite slow (~2s) and flaky under CI load. Add a step channel (buffered 1) to the Tracker, sent after each event in the loop. Tests wait on the channel with a 1s timeout. Suite now completes in <10ms. --- eth/txtracker/tracker.go | 6 ++++++ eth/txtracker/tracker_test.go | 35 ++++++++++++++++++++--------------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go index 842d1776c3..b5ebc5ba7e 100644 --- a/eth/txtracker/tracker.go +++ b/eth/txtracker/tracker.go @@ -76,6 +76,7 @@ type Tracker struct { sub event.Subscription quit chan struct{} + step chan struct{} // test sync: sent after each event is processed wg sync.WaitGroup } @@ -85,6 +86,7 @@ func New() *Tracker { txs: make(map[common.Hash]string), peers: make(map[string]*peerStats), quit: make(chan struct{}), + step: make(chan struct{}, 1), } } @@ -171,6 +173,10 @@ func (t *Tracker) loop() { select { case ev := <-t.headCh: t.handleChainHead(ev) + select { + case t.step <- struct{}{}: + default: + } case <-t.sub.Err(): return case <-t.quit: diff --git a/eth/txtracker/tracker_test.go b/eth/txtracker/tracker_test.go index 22ddabf0e9..9274e70d26 100644 --- a/eth/txtracker/tracker_test.go +++ b/eth/txtracker/tracker_test.go @@ -96,9 +96,14 @@ func makeTx(nonce uint64) *types.Transaction { return types.NewTx(&types.LegacyTx{Nonce: nonce, GasPrice: big.NewInt(1), Gas: 21000}) } -// waitForHead gives the tracker time to process a chain head event. -func waitForHead() { - time.Sleep(50 * time.Millisecond) +// waitStep blocks until the tracker has processed one event. +func waitStep(t *testing.T, tr *Tracker) { + t.Helper() + select { + case <-tr.step: + case <-time.After(time.Second): + t.Fatal("timeout waiting for tracker step") + } } func TestNotifyReceived(t *testing.T) { @@ -132,7 +137,7 @@ func TestInclusionEMA(t *testing.T) { // Block 1 includes peerA's tx. chain.addBlock(1, []*types.Transaction{tx}) chain.sendHead(1) - waitForHead() + waitStep(t, tr) stats := tr.GetAllPeerStats() if stats["peerA"].RecentIncluded <= 0 { @@ -143,7 +148,7 @@ func TestInclusionEMA(t *testing.T) { // Block 2 has no txs from peerA — EMA should decay. chain.addBlock(2, nil) chain.sendHead(2) - waitForHead() + waitStep(t, tr) stats = tr.GetAllPeerStats() if stats["peerA"].RecentIncluded >= ema1 { @@ -163,7 +168,7 @@ func TestFinalization(t *testing.T) { // Include in block 1. chain.addBlock(1, []*types.Transaction{tx}) chain.sendHead(1) - waitForHead() + waitStep(t, tr) // Not finalized yet. stats := tr.GetAllPeerStats() @@ -175,7 +180,7 @@ func TestFinalization(t *testing.T) { chain.setFinalBlock(1) chain.addBlock(2, nil) chain.sendHead(2) - waitForHead() + waitStep(t, tr) stats = tr.GetAllPeerStats() if stats["peerA"].Finalized != 1 { @@ -197,13 +202,13 @@ func TestMultiplePeers(t *testing.T) { // Both included in block 1. chain.addBlock(1, []*types.Transaction{tx1, tx2}) chain.sendHead(1) - waitForHead() + waitStep(t, tr) // Finalize. chain.setFinalBlock(1) chain.addBlock(2, nil) chain.sendHead(2) - waitForHead() + waitStep(t, tr) stats := tr.GetAllPeerStats() if stats["peerA"].Finalized != 1 { @@ -226,12 +231,12 @@ func TestFirstDelivererWins(t *testing.T) { chain.addBlock(1, []*types.Transaction{tx}) chain.sendHead(1) - waitForHead() + waitStep(t, tr) chain.setFinalBlock(1) chain.addBlock(2, nil) chain.sendHead(2) - waitForHead() + waitStep(t, tr) stats := tr.GetAllPeerStats() if stats["peerA"].Finalized != 1 { @@ -254,12 +259,12 @@ func TestNoFinalizationCredit(t *testing.T) { // Include but don't finalize. chain.addBlock(1, []*types.Transaction{tx}) chain.sendHead(1) - waitForHead() + waitStep(t, tr) // Send more heads without finalization. chain.addBlock(2, nil) chain.sendHead(2) - waitForHead() + waitStep(t, tr) stats := tr.GetAllPeerStats() if stats["peerA"].Finalized != 0 { @@ -279,13 +284,13 @@ func TestEMADecay(t *testing.T) { // Include in block 1. chain.addBlock(1, []*types.Transaction{tx}) chain.sendHead(1) - waitForHead() + waitStep(t, tr) // Send 30 empty blocks — EMA should decay close to zero. for i := uint64(2); i <= 31; i++ { chain.addBlock(i, nil) chain.sendHead(i) - waitForHead() + waitStep(t, tr) } stats := tr.GetAllPeerStats() From d7fddf418e3638eb9bf6996d3fa22e6495b26ce0 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Sat, 11 Apr 2026 16:39:15 +0200 Subject: [PATCH 21/38] =?UTF-8?q?eth:=20fix=20lint=20issues=20=E2=80=94=20?= =?UTF-8?q?goimports=20ordering=20and=20fuzzer=20build?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix goimports violations: align callback field comments in tx_fetcher.go, sort import of eth/txtracker after eth/protocols in handler.go. Add missing onAccepted parameter (nil) to the txfetcher fuzzer. --- eth/fetcher/tx_fetcher.go | 10 +++++----- eth/handler.go | 2 +- tests/fuzzers/txfetcher/txfetcher_fuzzer.go | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/eth/fetcher/tx_fetcher.go b/eth/fetcher/tx_fetcher.go index 6e916747da..ba48c6cc8d 100644 --- a/eth/fetcher/tx_fetcher.go +++ b/eth/fetcher/tx_fetcher.go @@ -180,11 +180,11 @@ type TxFetcher struct { alternates map[common.Hash]map[string]struct{} // In-flight transaction alternate origins if retrieval fails // Callbacks - validateMeta func(common.Hash, byte) error // Validate a tx metadata based on the local txpool - addTxs func([]*types.Transaction) []error // Insert a batch of transactions into local txpool - fetchTxs func(string, []common.Hash) error // Retrieves a set of txs from a remote peer - dropPeer func(string) // Drops a peer in case of announcement violation - onAccepted func(peer string, hashes []common.Hash) // Optional: notified with accepted tx hashes per peer + validateMeta func(common.Hash, byte) error // Validate a tx metadata based on the local txpool + addTxs func([]*types.Transaction) []error // Insert a batch of transactions into local txpool + fetchTxs func(string, []common.Hash) error // Retrieves a set of txs from a remote peer + dropPeer func(string) // Drops a peer in case of announcement violation + onAccepted func(peer string, hashes []common.Hash) // Optional: notified with accepted tx hashes per peer step chan struct{} // Notification channel when the fetcher loop iterates clock mclock.Clock // Monotonic clock or simulated clock for tests diff --git a/eth/handler.go b/eth/handler.go index 69469fddd0..d809053533 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -36,9 +36,9 @@ import ( "github.com/ethereum/go-ethereum/eth/downloader" "github.com/ethereum/go-ethereum/eth/ethconfig" "github.com/ethereum/go-ethereum/eth/fetcher" - "github.com/ethereum/go-ethereum/eth/txtracker" "github.com/ethereum/go-ethereum/eth/protocols/eth" "github.com/ethereum/go-ethereum/eth/protocols/snap" + "github.com/ethereum/go-ethereum/eth/txtracker" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/log" diff --git a/tests/fuzzers/txfetcher/txfetcher_fuzzer.go b/tests/fuzzers/txfetcher/txfetcher_fuzzer.go index bcceaff383..69674d0e62 100644 --- a/tests/fuzzers/txfetcher/txfetcher_fuzzer.go +++ b/tests/fuzzers/txfetcher/txfetcher_fuzzer.go @@ -84,7 +84,7 @@ func fuzz(input []byte) int { return make([]error, len(txs)) }, func(string, []common.Hash) error { return nil }, - nil, + nil, nil, clock, func() time.Time { nanoTime := int64(clock.Now()) From ed3d5ab3da542abfc5c95adbc8c5ce1d2cc33dda Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Mon, 13 Apr 2026 10:01:26 +0200 Subject: [PATCH 22/38] eth: skip protection work in dropper when pools have headroom When neither the dialed nor inbound peer pool is close to capacity, every non-trusted/non-static peer is already marked do-not-drop by the pool-threshold rules in selectDoNotDrop, so the droppable set is guaranteed empty regardless of inclusion protection. Return early in that case to avoid the wasted peerStatsFunc call, per-direction split, and per-category sort in protectedPeers. --- eth/dropper.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/eth/dropper.go b/eth/dropper.go index eae5344142..bf69e8f0e1 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -164,6 +164,14 @@ func (cm *dropper) dropRandomPeer() bool { } numDialed := len(peers) - numInbound + // Fast path: if neither pool is near capacity, every non-trusted/non-static + // peer is already do-not-drop by pool-threshold rules. No point computing + // inclusion protection. + if cm.maxDialPeers-numDialed > peerDropThreshold && + cm.maxInboundPeers-numInbound > peerDropThreshold { + return false + } + // Compute the set of inclusion-protected peers before filtering. protected := cm.protectedPeers(peers) From 832f2062755c2704705431183d02c576f5d2b889 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Mon, 13 Apr 2026 10:01:54 +0200 Subject: [PATCH 23/38] eth: split dropper skip metric into total and protection-caused Rename eth/dropper/protected to eth/dropper/skipped and mark it on every skip (fast-path headroom, all candidates un-droppable, or protection emptied the list). Add eth/dropper/skipped_protected to count the subset of skips where at least one otherwise-droppable peer was kept only because of inclusion protection. The pair lets operators see both the total churn-miss rate and how often peer protection specifically is the cause. --- eth/dropper.go | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/eth/dropper.go b/eth/dropper.go index bf69e8f0e1..75eea891ca 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -52,9 +52,14 @@ var ( droppedInbound = metrics.NewRegisteredMeter("eth/dropper/inbound", nil) // droppedOutbound is the number of outbound peers dropped droppedOutbound = metrics.NewRegisteredMeter("eth/dropper/outbound", nil) - // dropSkipped counts times a drop was skipped because all - // droppable candidates were protected by inclusion stats. - dropSkipped = metrics.NewRegisteredMeter("eth/dropper/protected", nil) + // dropSkipped counts times a drop was attempted but no peer was dropped, + // for any reason (pool has headroom, all candidates trusted/static/young, + // or protected by inclusion stats). + dropSkipped = metrics.NewRegisteredMeter("eth/dropper/skipped", nil) + // dropSkippedProtected counts the subset of skips caused specifically by + // inclusion-based peer protection: at least one otherwise-droppable peer + // was kept only because it was in the protected set. + dropSkippedProtected = metrics.NewRegisteredMeter("eth/dropper/skipped_protected", nil) ) // PeerInclusionStats holds the per-peer inclusion data needed by the dropper @@ -169,24 +174,33 @@ func (cm *dropper) dropRandomPeer() bool { // inclusion protection. if cm.maxDialPeers-numDialed > peerDropThreshold && cm.maxInboundPeers-numInbound > peerDropThreshold { + dropSkipped.Mark(1) return false } // Compute the set of inclusion-protected peers before filtering. protected := cm.protectedPeers(peers) - selectDoNotDrop := func(p *p2p.Peer) bool { + baseNotDrop := func(p *p2p.Peer) bool { return p.Trusted() || p.StaticDialed() || p.Lifetime() < mclock.AbsTime(doNotDropBefore) || (p.DynDialed() && cm.maxDialPeers-numDialed > peerDropThreshold) || - (p.Inbound() && cm.maxInboundPeers-numInbound > peerDropThreshold) || - protected[p] + (p.Inbound() && cm.maxInboundPeers-numInbound > peerDropThreshold) + } + selectDoNotDrop := func(p *p2p.Peer) bool { + return baseNotDrop(p) || protected[p] } droppable := slices.DeleteFunc(peers, selectDoNotDrop) if len(droppable) == 0 { - if len(protected) > 0 { - dropSkipped.Mark(1) + dropSkipped.Mark(1) + // If any protected peer would otherwise have been droppable, + // protection was the cause of the skip. + for p := range protected { + if !baseNotDrop(p) { + dropSkippedProtected.Mark(1) + break + } } return false } From 5014761cf1160cec2baa7586d97da0910c8f7c79 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Mon, 13 Apr 2026 16:56:09 +0200 Subject: [PATCH 24/38] eth/txtracker: strengthen TestNotifyReceived assertions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original assertions read stats["peerA"].Finalized and .RecentIncluded, both of which return zero for a missing key — so the test would pass even if NotifyAccepted were a complete no-op, contradicting its stated purpose. Assert that exactly one peer entry exists, peerA is present, and the internal txs map and FIFO order slice are populated as NotifyAccepted is meant to do. --- eth/txtracker/tracker_test.go | 38 +++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/eth/txtracker/tracker_test.go b/eth/txtracker/tracker_test.go index 9274e70d26..7b337a9c69 100644 --- a/eth/txtracker/tracker_test.go +++ b/eth/txtracker/tracker_test.go @@ -113,15 +113,41 @@ func TestNotifyReceived(t *testing.T) { defer tr.Stop() txs := []*types.Transaction{makeTx(1), makeTx(2), makeTx(3)} - tr.NotifyAccepted("peerA", hashTxs(txs)) + hashes := hashTxs(txs) + tr.NotifyAccepted("peerA", hashes) - // No chain events yet — peer entry exists but with zero stats. + // Public surface: peer entry was created with zero stats before any + // chain events. Map lookups would return a zero value for a missing + // key, so assert presence explicitly. stats := tr.GetAllPeerStats() - if stats["peerA"].Finalized != 0 { - t.Fatalf("expected zero Finalized before chain events, got %d", stats["peerA"].Finalized) + if len(stats) != 1 { + t.Fatalf("expected 1 peer entry, got %d", len(stats)) } - if stats["peerA"].RecentIncluded != 0 { - t.Fatalf("expected zero RecentIncluded before chain events, got %f", stats["peerA"].RecentIncluded) + ps, ok := stats["peerA"] + if !ok { + t.Fatal("expected peerA entry, not found") + } + if ps.Finalized != 0 || ps.RecentIncluded != 0 { + t.Fatalf("expected zero stats before chain events, got %+v", ps) + } + + // Internal state: all tx→deliverer mappings recorded, insertion order + // preserved in the FIFO slice. + tr.mu.Lock() + defer tr.mu.Unlock() + if len(tr.txs) != 3 { + t.Fatalf("expected 3 tracked txs, got %d", len(tr.txs)) + } + if len(tr.order) != 3 { + t.Fatalf("expected order length 3, got %d", len(tr.order)) + } + for i, h := range hashes { + if got := tr.txs[h]; got != "peerA" { + t.Fatalf("tx %d: expected deliverer=peerA, got %q", i, got) + } + if tr.order[i] != h { + t.Fatalf("order[%d] mismatch", i) + } } } From 72f8ef6f69df557a415f321d6ecf35d55cf7dfda Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Mon, 13 Apr 2026 16:56:37 +0200 Subject: [PATCH 25/38] eth/txtracker: test reorg safety of head block lookup handleChainHead resolves the head block via GetBlock(hash, number) so that a stale head event after a reorg cannot credit transactions from the wrong block. The existing mockChain ignored the hash argument, so a regression to GetBlockByNumber would have gone undetected. Make mockChain hash-aware: store blocks keyed by hash with a separate canonical-by-number index for the finalization path, and have sendHead emit the real block's hash. Add TestReorgSafety with two blocks at the same height to exercise the hash selector directly. --- eth/txtracker/tracker_test.go | 117 ++++++++++++++++++++++++++++++---- 1 file changed, 103 insertions(+), 14 deletions(-) diff --git a/eth/txtracker/tracker_test.go b/eth/txtracker/tracker_test.go index 7b337a9c69..013a0068b8 100644 --- a/eth/txtracker/tracker_test.go +++ b/eth/txtracker/tracker_test.go @@ -30,15 +30,24 @@ import ( ) // mockChain implements the Chain interface for testing. +// +// Blocks are stored by hash to exercise the reorg-safe lookup path in +// tracker.handleChainHead (which calls GetBlock(hash, number)). A separate +// canonicalByNum index maps each height to its canonical block hash, used +// by GetBlockByNumber (the finalization path). type mockChain struct { - mu sync.Mutex - headFeed event.Feed - blocks map[uint64]*types.Block - finalNum uint64 + mu sync.Mutex + headFeed event.Feed + blocksByHash map[common.Hash]*types.Block + canonicalByNum map[uint64]common.Hash + finalNum uint64 } func newMockChain() *mockChain { - return &mockChain{blocks: make(map[uint64]*types.Block)} + return &mockChain{ + blocksByHash: make(map[common.Hash]*types.Block), + canonicalByNum: make(map[uint64]common.Hash), + } } func (c *mockChain) SubscribeChainHeadEvent(ch chan<- core.ChainHeadEvent) event.Subscription { @@ -48,12 +57,17 @@ func (c *mockChain) SubscribeChainHeadEvent(ch chan<- core.ChainHeadEvent) event func (c *mockChain) GetBlockByNumber(number uint64) *types.Block { c.mu.Lock() defer c.mu.Unlock() - return c.blocks[number] + hash, ok := c.canonicalByNum[number] + if !ok { + return nil + } + return c.blocksByHash[hash] } func (c *mockChain) GetBlock(hash common.Hash, number uint64) *types.Block { - // In tests we only key by number; ignore hash. - return c.GetBlockByNumber(number) + c.mu.Lock() + defer c.mu.Unlock() + return c.blocksByHash[hash] } func (c *mockChain) CurrentFinalBlock() *types.Header { @@ -65,11 +79,30 @@ func (c *mockChain) CurrentFinalBlock() *types.Header { return &types.Header{Number: new(big.Int).SetUint64(c.finalNum)} } -func (c *mockChain) addBlock(num uint64, txs []*types.Transaction) { +// addBlock adds a canonical block at the given height. Overwrites any +// prior canonical block at that height. +func (c *mockChain) addBlock(num uint64, txs []*types.Transaction) *types.Block { + return c.addBlockAtHeight(num, num, txs, true) +} + +// addBlockAtHeight adds a block at the given height. The salt parameter +// ensures distinct block hashes for two blocks at the same height (used +// for reorg tests). If canonical is true, the block becomes the canonical +// block for that height (looked up by GetBlockByNumber). +func (c *mockChain) addBlockAtHeight(num, salt uint64, txs []*types.Transaction, canonical bool) *types.Block { c.mu.Lock() defer c.mu.Unlock() - header := &types.Header{Number: new(big.Int).SetUint64(num)} - c.blocks[num] = types.NewBlock(header, &types.Body{Transactions: txs}, nil, trie.NewListHasher()) + // Mix salt into Extra so siblings at the same height get distinct hashes. + header := &types.Header{ + Number: new(big.Int).SetUint64(num), + Extra: big.NewInt(int64(salt)).Bytes(), + } + block := types.NewBlock(header, &types.Body{Transactions: txs}, nil, trie.NewListHasher()) + c.blocksByHash[block.Hash()] = block + if canonical { + c.canonicalByNum[num] = block.Hash() + } + return block } func (c *mockChain) setFinalBlock(num uint64) { @@ -78,10 +111,24 @@ func (c *mockChain) setFinalBlock(num uint64) { c.finalNum = num } +// sendHead emits a chain head event for the canonical block at the given +// height. The emitted header carries the real block's hash so the +// tracker's GetBlock(hash, number) lookup resolves correctly. func (c *mockChain) sendHead(num uint64) { - c.headFeed.Send(core.ChainHeadEvent{ - Header: &types.Header{Number: new(big.Int).SetUint64(num)}, - }) + c.mu.Lock() + hash := c.canonicalByNum[num] + block := c.blocksByHash[hash] + c.mu.Unlock() + if block == nil { + panic("sendHead: no canonical block at height") + } + c.headFeed.Send(core.ChainHeadEvent{Header: block.Header()}) +} + +// sendHeadBlock emits a chain head event for the given block (may be +// non-canonical). Used for reorg tests. +func (c *mockChain) sendHeadBlock(block *types.Block) { + c.headFeed.Send(core.ChainHeadEvent{Header: block.Header()}) } func hashTxs(txs []*types.Transaction) []common.Hash { @@ -324,3 +371,45 @@ func TestEMADecay(t *testing.T) { t.Fatalf("expected RecentIncluded near zero after 30 empty blocks, got %f", stats["peerA"].RecentIncluded) } } + +// TestReorgSafety verifies that handleChainHead resolves the head block by +// HASH (not just by number), so a head event announcing a sibling block at +// the same height does not credit transactions from the canonical block. +// +// Regression check: if the tracker were changed to use GetBlockByNumber, +// it would always fetch the canonical block A and credit peerA even when +// the head points to sibling B. +func TestReorgSafety(t *testing.T) { + tr := New() + chain := newMockChain() + tr.Start(chain) + defer tr.Stop() + + tx := makeTx(1) + tr.NotifyAccepted("peerA", []common.Hash{tx.Hash()}) + + // Two blocks at height 1: canonical A contains tx; sibling B does not. + blockA := chain.addBlockAtHeight(1, 1, []*types.Transaction{tx}, true) + blockB := chain.addBlockAtHeight(1, 2, nil, false) + if blockA.Hash() == blockB.Hash() { + t.Fatal("sibling blocks ended up with the same hash") + } + + // Head announces sibling B. A hash-aware tracker fetches B, sees no + // peerA txs, and leaves the EMA at zero. A number-only tracker would + // instead fetch A and credit peerA. + chain.sendHeadBlock(blockB) + waitStep(t, tr) + + if got := tr.GetAllPeerStats()["peerA"].RecentIncluded; got != 0 { + t.Fatalf("expected RecentIncluded=0 after sibling-B head event, got %f (tracker followed the wrong block)", got) + } + + // Now announce canonical A; peerA should be credited. + chain.sendHeadBlock(blockA) + waitStep(t, tr) + + if got := tr.GetAllPeerStats()["peerA"].RecentIncluded; got <= 0 { + t.Fatalf("expected RecentIncluded>0 after canonical-A head event, got %f", got) + } +} From a7ce1e2ad8651c2cfb39d448ff2f1a33113d830c Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Mon, 13 Apr 2026 16:56:53 +0200 Subject: [PATCH 26/38] eth: test per-pool top-N selection in dropper peer protection The protection feature promises top-N per inbound/dialed pool, but every existing test constructed peers via p2p.NewPeer (which produces no-flag peers), so all test peers landed in the dialed pool and the per-pool split was never validated. Extract the selection logic from protectedPeers into a pure helper protectedPeersByPool(inbound, dialed, stats) that accepts pre-split pools. This sidesteps the unexported p2p.connFlag types and makes the interesting behavior directly testable. Add three tests covering: - exact top-N selected independently in each pool - cross-category union with overlap deduplication - per-pool independence: top dialed peers stay protected even when every inbound peer scores higher globally --- eth/dropper.go | 18 ++++++-- eth/dropper_test.go | 109 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 4 deletions(-) diff --git a/eth/dropper.go b/eth/dropper.go index 75eea891ca..741ca5cbd2 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -236,8 +236,21 @@ func (cm *dropper) protectedPeers(peers []*p2p.Peer) map[*p2p.Peer]bool { dialed = append(dialed, p) } } - // protectPool selects the top-frac peers from pool by score and adds them to result. + result := protectedPeersByPool(inbound, dialed, stats) + if len(result) > 0 { + log.Debug("Protecting high-value peers from drop", "protected", len(result)) + } + return result +} + +// protectedPeersByPool selects the union of top-N peers per protection +// category across the given already-split inbound and dialed pools. +// Factored from protectedPeers so tests can exercise the per-pool +// selection logic without needing to construct direction-flagged +// *p2p.Peer instances (which require unexported p2p types). +func protectedPeersByPool(inbound, dialed []*p2p.Peer, stats map[string]PeerInclusionStats) map[*p2p.Peer]bool { result := make(map[*p2p.Peer]bool) + // protectPool selects the top-frac peers from pool by score and adds them to result. protectPool := func(pool []*p2p.Peer, score func(*p2p.Peer) float64, frac float64) { n := int(float64(len(pool)) * frac) if n == 0 { @@ -260,9 +273,6 @@ func (cm *dropper) protectedPeers(peers []*p2p.Peer) map[*p2p.Peer]bool { protectPool(inbound, score, cat.frac) protectPool(dialed, score, cat.frac) } - if len(result) > 0 { - log.Debug("Protecting high-value peers from drop", "protected", len(result)) - } return result } diff --git a/eth/dropper_test.go b/eth/dropper_test.go index ba1eb66faf..ea764c9ab3 100644 --- a/eth/dropper_test.go +++ b/eth/dropper_test.go @@ -122,3 +122,112 @@ func TestProtectedPeersNilFunc(t *testing.T) { t.Fatalf("expected nil with nil stats func, got %v", protected) } } + +// TestProtectedByPoolPerPoolTopN verifies that the top-N selection runs +// independently in each of the inbound and dialed pools, not globally. +// With 10 peers per pool and inclusionProtectionFrac=0.1, exactly 1 peer +// is protected per pool per category — so 2 total (one per pool), both +// for the Finalized category since we don't set RecentIncluded. +func TestProtectedByPoolPerPoolTopN(t *testing.T) { + inbound := makePeers(10) + dialed := makePeers(10) + // Distinguish dialed peer IDs from inbound so stats maps don't collide. + for i := range dialed { + id := enode.ID{byte(100 + i)} + dialed[i] = p2p.NewPeer(id, fmt.Sprintf("dialed%d", i), nil) + } + // Strictly increasing scores: highest wins in each pool. + stats := make(map[string]PeerInclusionStats) + for i, p := range inbound { + stats[p.ID().String()] = PeerInclusionStats{Finalized: int64(1 + i)} + } + for i, p := range dialed { + stats[p.ID().String()] = PeerInclusionStats{Finalized: int64(1 + i)} + } + + protected := protectedPeersByPool(inbound, dialed, stats) + + // Expect top 1 of inbound (inbound[9]) and top 1 of dialed (dialed[9]). + if len(protected) != 2 { + t.Fatalf("expected 2 protected peers (1 per pool), got %d", len(protected)) + } + if !protected[inbound[9]] { + t.Error("expected top inbound peer to be protected") + } + if !protected[dialed[9]] { + t.Error("expected top dialed peer to be protected") + } +} + +// TestProtectedByPoolCrossCategoryOverlap verifies that the union across +// protection categories is correctly deduplicated: a peer that wins in +// multiple categories appears once, and category winners are all +// protected. Uses a pool large enough that frac*len yields n=2 per +// category, so cross-category overlap is observable. +func TestProtectedByPoolCrossCategoryOverlap(t *testing.T) { + // 20 dialed peers so 0.1 * 20 = 2 protected per category. + dialed := makePeers(20) + // P0: high Finalized only. P1: high RecentIncluded only. P2: high both. + // With n=2 per category: + // Finalized winners: P2 (tie-broken-ok), P0 + // RecentIncluded winners: P2, P1 + // Union: {P0, P1, P2}. + stats := make(map[string]PeerInclusionStats) + stats[dialed[0].ID().String()] = PeerInclusionStats{Finalized: 100, RecentIncluded: 0} + stats[dialed[1].ID().String()] = PeerInclusionStats{Finalized: 0, RecentIncluded: 5.0} + stats[dialed[2].ID().String()] = PeerInclusionStats{Finalized: 200, RecentIncluded: 10.0} + + protected := protectedPeersByPool(nil, dialed, stats) + + if len(protected) != 3 { + t.Fatalf("expected 3 protected peers (union of category winners), got %d", len(protected)) + } + for _, idx := range []int{0, 1, 2} { + if !protected[dialed[idx]] { + t.Errorf("peer %d should be protected", idx) + } + } +} + +// TestProtectedByPoolPerPoolIndependence locks in that selection runs +// per-pool, not globally. Every inbound peer scores higher than every +// dialed peer, so a global top-N would pick only inbound peers. Per-pool +// top-N must still protect the top dialed peers. +func TestProtectedByPoolPerPoolIndependence(t *testing.T) { + // 20 inbound, 20 dialed — frac=0.1 → 2 protected per pool per category. + // Global top-4 of Finalized would be inbound[16..19] — zero dialed. + inbound := makePeers(20) + dialed := make([]*p2p.Peer, 20) + for i := range dialed { + id := enode.ID{byte(100 + i)} + dialed[i] = p2p.NewPeer(id, fmt.Sprintf("dialed%d", i), nil) + } + stats := make(map[string]PeerInclusionStats) + // Every inbound peer outscores every dialed peer. + for i, p := range inbound { + stats[p.ID().String()] = PeerInclusionStats{Finalized: int64(1000 + i)} + } + for i, p := range dialed { + stats[p.ID().String()] = PeerInclusionStats{Finalized: int64(1 + i)} + } + + protected := protectedPeersByPool(inbound, dialed, stats) + + // Per-pool top-2 of Finalized: + // inbound: inbound[18], inbound[19] + // dialed: dialed[18], dialed[19] + // Global top-N would contain zero dialed peers, so asserting the top + // dialed peers are protected enforces per-pool independence. + if !protected[dialed[19]] { + t.Fatal("top dialed peer must be protected regardless of globally-higher inbound peers") + } + if !protected[dialed[18]] { + t.Fatal("second-top dialed peer must be protected regardless of globally-higher inbound peers") + } + if !protected[inbound[19]] || !protected[inbound[18]] { + t.Fatal("top inbound peers must also be protected") + } + if len(protected) != 4 { + t.Fatalf("expected 4 protected peers (top-2 of each pool), got %d", len(protected)) + } +} From 69a7baefd8986a2d651cedf1ca9a74f94a5f690b Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Mon, 13 Apr 2026 19:13:12 +0200 Subject: [PATCH 27/38] eth: drop skipped_protected metric and simplify dropper skip path The skipped_protected metric (added earlier on this branch) counted the subset of drop skips where inclusion protection was the cause. The signal can be inferred from rising dropSkipped rate plus the existing "Protecting high-value peers" debug log, which wasn't worth the second metric, the causality-check loop over the protected set, and the baseNotDrop closure extracted solely to share the predicate. Collapse baseNotDrop back into selectDoNotDrop and remove the metric. dropSkipped still fires on every skip (fast-path headroom + all-filtered). --- eth/dropper.go | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/eth/dropper.go b/eth/dropper.go index 741ca5cbd2..d3eb718e6a 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -56,10 +56,6 @@ var ( // for any reason (pool has headroom, all candidates trusted/static/young, // or protected by inclusion stats). dropSkipped = metrics.NewRegisteredMeter("eth/dropper/skipped", nil) - // dropSkippedProtected counts the subset of skips caused specifically by - // inclusion-based peer protection: at least one otherwise-droppable peer - // was kept only because it was in the protected set. - dropSkippedProtected = metrics.NewRegisteredMeter("eth/dropper/skipped_protected", nil) ) // PeerInclusionStats holds the per-peer inclusion data needed by the dropper @@ -181,27 +177,17 @@ func (cm *dropper) dropRandomPeer() bool { // Compute the set of inclusion-protected peers before filtering. protected := cm.protectedPeers(peers) - baseNotDrop := func(p *p2p.Peer) bool { + selectDoNotDrop := func(p *p2p.Peer) bool { return p.Trusted() || p.StaticDialed() || p.Lifetime() < mclock.AbsTime(doNotDropBefore) || (p.DynDialed() && cm.maxDialPeers-numDialed > peerDropThreshold) || - (p.Inbound() && cm.maxInboundPeers-numInbound > peerDropThreshold) - } - selectDoNotDrop := func(p *p2p.Peer) bool { - return baseNotDrop(p) || protected[p] + (p.Inbound() && cm.maxInboundPeers-numInbound > peerDropThreshold) || + protected[p] } droppable := slices.DeleteFunc(peers, selectDoNotDrop) if len(droppable) == 0 { dropSkipped.Mark(1) - // If any protected peer would otherwise have been droppable, - // protection was the cause of the skip. - for p := range protected { - if !baseNotDrop(p) { - dropSkippedProtected.Mark(1) - break - } - } return false } p := droppable[mrand.Intn(len(droppable))] From 1f2ebc5d5989556fa36ed2e8ad34e76a75dda133 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Wed, 15 Apr 2026 14:35:37 +0200 Subject: [PATCH 28/38] eth: drop PeerInclusionStats wrapper and use txtracker.PeerStats directly PeerInclusionStats was declared identically to txtracker.PeerStats as a decoupling abstraction: any stats provider could implement the dropper's callback by returning this shape. In practice there's one provider and the two types were kept in sync by a rote copy adapter in backend.go. Delete PeerInclusionStats, have the dropper consume txtracker.PeerStats directly via getPeerStatsFunc. backend.go now passes txTracker.GetAllPeerStats as the callback with no adapter. If a second stats provider ever appears, the abstraction can come back; until then, one fewer type and 8 fewer lines of ceremony. --- eth/backend.go | 9 +-------- eth/dropper.go | 23 ++++++++-------------- eth/dropper_test.go | 47 +++++++++++++++++++++++---------------------- 3 files changed, 33 insertions(+), 46 deletions(-) diff --git a/eth/backend.go b/eth/backend.go index d5ec34e21d..551506f6f3 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -463,14 +463,7 @@ func (s *Ethereum) Start() error { s.handler.txTracker.Start(s.blockchain) // Start the connection manager with inclusion-based peer protection. - s.dropper.Start(s.p2pServer, func() bool { return !s.Synced() }, func() map[string]PeerInclusionStats { - stats := s.handler.txTracker.GetAllPeerStats() - result := make(map[string]PeerInclusionStats, len(stats)) - for id, ps := range stats { - result[id] = PeerInclusionStats{Finalized: ps.Finalized, RecentIncluded: ps.RecentIncluded} - } - return result - }) + s.dropper.Start(s.p2pServer, func() bool { return !s.Synced() }, s.handler.txTracker.GetAllPeerStats) // start log indexer s.filterMaps.Start() diff --git a/eth/dropper.go b/eth/dropper.go index d3eb718e6a..591e3c4277 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -25,6 +25,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/mclock" + "github.com/ethereum/go-ethereum/eth/txtracker" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/p2p" @@ -58,30 +59,22 @@ var ( dropSkipped = metrics.NewRegisteredMeter("eth/dropper/skipped", nil) ) -// PeerInclusionStats holds the per-peer inclusion data needed by the dropper -// to decide which peers to protect. Any stats provider (e.g. txtracker) can -// implement getPeerInclusionStatsFunc by returning this struct per peer ID. -type PeerInclusionStats struct { - Finalized int64 // Cumulative finalized inclusions attributed to this peer - RecentIncluded float64 // EMA of per-block inclusions (0 if not tracked) -} - // Callback type to get per-peer inclusion statistics. -type getPeerInclusionStatsFunc func() map[string]PeerInclusionStats +type getPeerStatsFunc func() map[string]txtracker.PeerStats // protectionCategory defines a peer scoring function and the fraction of peers // to protect per inbound/dialed category. Multiple categories are unioned. type protectionCategory struct { name string - score func(PeerInclusionStats) float64 + score func(txtracker.PeerStats) float64 frac float64 // fraction of max peers to protect (0.0–1.0) } // protectionCategories is the list of protection criteria. Each category // independently selects its top-N peers per pool; the union is protected. var protectionCategories = []protectionCategory{ - {"total-finalized", func(s PeerInclusionStats) float64 { return float64(s.Finalized) }, inclusionProtectionFrac}, - {"recent-included", func(s PeerInclusionStats) float64 { return s.RecentIncluded }, inclusionProtectionFrac}, + {"total-finalized", func(s txtracker.PeerStats) float64 { return float64(s.Finalized) }, inclusionProtectionFrac}, + {"recent-included", func(s txtracker.PeerStats) float64 { return s.RecentIncluded }, inclusionProtectionFrac}, } // dropper monitors the state of the peer pool and introduces churn by @@ -107,7 +100,7 @@ type dropper struct { maxInboundPeers int // maximum number of inbound peers peersFunc getPeersFunc syncingFunc getSyncingFunc - peerStatsFunc getPeerInclusionStatsFunc // optional: inclusion stats for protection + peerStatsFunc getPeerStatsFunc // optional: inclusion stats for protection // peerDropTimer introduces churn if we are close to limit capacity. // We handle Dialed and Inbound connections separately @@ -139,7 +132,7 @@ func newDropper(maxDialPeers, maxInboundPeers int) *dropper { // Start the dropper. peerStatsFunc is optional (nil disables inclusion // protection). -func (cm *dropper) Start(srv *p2p.Server, syncingFunc getSyncingFunc, peerStatsFunc getPeerInclusionStatsFunc) { +func (cm *dropper) Start(srv *p2p.Server, syncingFunc getSyncingFunc, peerStatsFunc getPeerStatsFunc) { cm.peersFunc = srv.Peers cm.syncingFunc = syncingFunc cm.peerStatsFunc = peerStatsFunc @@ -234,7 +227,7 @@ func (cm *dropper) protectedPeers(peers []*p2p.Peer) map[*p2p.Peer]bool { // Factored from protectedPeers so tests can exercise the per-pool // selection logic without needing to construct direction-flagged // *p2p.Peer instances (which require unexported p2p types). -func protectedPeersByPool(inbound, dialed []*p2p.Peer, stats map[string]PeerInclusionStats) map[*p2p.Peer]bool { +func protectedPeersByPool(inbound, dialed []*p2p.Peer, stats map[string]txtracker.PeerStats) map[*p2p.Peer]bool { result := make(map[*p2p.Peer]bool) // protectPool selects the top-frac peers from pool by score and adds them to result. protectPool := func(pool []*p2p.Peer, score func(*p2p.Peer) float64, frac float64) { diff --git a/eth/dropper_test.go b/eth/dropper_test.go index ea764c9ab3..a85eb0011b 100644 --- a/eth/dropper_test.go +++ b/eth/dropper_test.go @@ -20,6 +20,7 @@ import ( "fmt" "testing" + "github.com/ethereum/go-ethereum/eth/txtracker" "github.com/ethereum/go-ethereum/p2p" "github.com/ethereum/go-ethereum/p2p/enode" ) @@ -35,7 +36,7 @@ func makePeers(n int) []*p2p.Peer { func TestProtectedPeersNoStats(t *testing.T) { cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} - cm.peerStatsFunc = func() map[string]PeerInclusionStats { return nil } + cm.peerStatsFunc = func() map[string]txtracker.PeerStats { return nil } peers := makePeers(10) protected := cm.protectedPeers(peers) @@ -46,8 +47,8 @@ func TestProtectedPeersNoStats(t *testing.T) { func TestProtectedPeersEmptyStats(t *testing.T) { cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} - cm.peerStatsFunc = func() map[string]PeerInclusionStats { - return map[string]PeerInclusionStats{} + cm.peerStatsFunc = func() map[string]txtracker.PeerStats { + return map[string]txtracker.PeerStats{} } peers := makePeers(10) @@ -62,11 +63,11 @@ func TestProtectedPeersTopPeer(t *testing.T) { cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} peers := makePeers(20) - stats := make(map[string]PeerInclusionStats) - stats[peers[0].ID().String()] = PeerInclusionStats{Finalized: 100} - stats[peers[1].ID().String()] = PeerInclusionStats{RecentIncluded: 5.0} + stats := make(map[string]txtracker.PeerStats) + stats[peers[0].ID().String()] = txtracker.PeerStats{Finalized: 100} + stats[peers[1].ID().String()] = txtracker.PeerStats{RecentIncluded: 5.0} - cm.peerStatsFunc = func() map[string]PeerInclusionStats { return stats } + cm.peerStatsFunc = func() map[string]txtracker.PeerStats { return stats } protected := cm.protectedPeers(peers) if len(protected) != 2 { @@ -84,11 +85,11 @@ func TestProtectedPeersZeroScore(t *testing.T) { cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} peers := makePeers(10) - stats := make(map[string]PeerInclusionStats) + stats := make(map[string]txtracker.PeerStats) for _, p := range peers { - stats[p.ID().String()] = PeerInclusionStats{} + stats[p.ID().String()] = txtracker.PeerStats{} } - cm.peerStatsFunc = func() map[string]PeerInclusionStats { return stats } + cm.peerStatsFunc = func() map[string]txtracker.PeerStats { return stats } protected := cm.protectedPeers(peers) if len(protected) != 0 { @@ -101,10 +102,10 @@ func TestProtectedPeersOverlap(t *testing.T) { cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} peers := makePeers(20) - stats := make(map[string]PeerInclusionStats) - stats[peers[0].ID().String()] = PeerInclusionStats{Finalized: 100, RecentIncluded: 5.0} + stats := make(map[string]txtracker.PeerStats) + stats[peers[0].ID().String()] = txtracker.PeerStats{Finalized: 100, RecentIncluded: 5.0} - cm.peerStatsFunc = func() map[string]PeerInclusionStats { return stats } + cm.peerStatsFunc = func() map[string]txtracker.PeerStats { return stats } protected := cm.protectedPeers(peers) if len(protected) != 1 { @@ -137,12 +138,12 @@ func TestProtectedByPoolPerPoolTopN(t *testing.T) { dialed[i] = p2p.NewPeer(id, fmt.Sprintf("dialed%d", i), nil) } // Strictly increasing scores: highest wins in each pool. - stats := make(map[string]PeerInclusionStats) + stats := make(map[string]txtracker.PeerStats) for i, p := range inbound { - stats[p.ID().String()] = PeerInclusionStats{Finalized: int64(1 + i)} + stats[p.ID().String()] = txtracker.PeerStats{Finalized: int64(1 + i)} } for i, p := range dialed { - stats[p.ID().String()] = PeerInclusionStats{Finalized: int64(1 + i)} + stats[p.ID().String()] = txtracker.PeerStats{Finalized: int64(1 + i)} } protected := protectedPeersByPool(inbound, dialed, stats) @@ -172,10 +173,10 @@ func TestProtectedByPoolCrossCategoryOverlap(t *testing.T) { // Finalized winners: P2 (tie-broken-ok), P0 // RecentIncluded winners: P2, P1 // Union: {P0, P1, P2}. - stats := make(map[string]PeerInclusionStats) - stats[dialed[0].ID().String()] = PeerInclusionStats{Finalized: 100, RecentIncluded: 0} - stats[dialed[1].ID().String()] = PeerInclusionStats{Finalized: 0, RecentIncluded: 5.0} - stats[dialed[2].ID().String()] = PeerInclusionStats{Finalized: 200, RecentIncluded: 10.0} + stats := make(map[string]txtracker.PeerStats) + stats[dialed[0].ID().String()] = txtracker.PeerStats{Finalized: 100, RecentIncluded: 0} + stats[dialed[1].ID().String()] = txtracker.PeerStats{Finalized: 0, RecentIncluded: 5.0} + stats[dialed[2].ID().String()] = txtracker.PeerStats{Finalized: 200, RecentIncluded: 10.0} protected := protectedPeersByPool(nil, dialed, stats) @@ -202,13 +203,13 @@ func TestProtectedByPoolPerPoolIndependence(t *testing.T) { id := enode.ID{byte(100 + i)} dialed[i] = p2p.NewPeer(id, fmt.Sprintf("dialed%d", i), nil) } - stats := make(map[string]PeerInclusionStats) + stats := make(map[string]txtracker.PeerStats) // Every inbound peer outscores every dialed peer. for i, p := range inbound { - stats[p.ID().String()] = PeerInclusionStats{Finalized: int64(1000 + i)} + stats[p.ID().String()] = txtracker.PeerStats{Finalized: int64(1000 + i)} } for i, p := range dialed { - stats[p.ID().String()] = PeerInclusionStats{Finalized: int64(1 + i)} + stats[p.ID().String()] = txtracker.PeerStats{Finalized: int64(1 + i)} } protected := protectedPeersByPool(inbound, dialed, stats) From f24161de71624a375ee01088efbc5b8b90c42824 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Sun, 19 Apr 2026 12:14:23 +0200 Subject: [PATCH 29/38] eth/txtracker: replace cumulative Finalized with slow RecentFinalized EMA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The total-finalized protection category ranked peers by a monotonic cumulative count, so a peer that had been productive in the past kept a high score forever — even if they had since gone silent — and held a protected slot without contributing. Replace txtracker.PeerStats.Finalized (int64 cumulative) with RecentFinalized (float64 EMA). On each chain head, finalization credits accumulated over the newly-finalized range are folded into a slow EMA (alpha=0.0001, half-life ~6930 blocks ≈ 23 hours on 12s mainnet blocks). Peers that continue contributing keep a high score; peers that stop decay toward zero over roughly a day. The dropper category renames to "recent-finalized" accordingly. The type's docstring is rewritten to describe both categories as EMAs with different time horizons (slow finalized, fast included). Refactors checkFinalization to return a per-peer credits map rather than mutating state directly, so both EMAs update in the same loop over tracked peers. --- eth/dropper.go | 16 ++++---- eth/dropper_test.go | 30 +++++++------- eth/txtracker/tracker.go | 66 +++++++++++++++++++------------ eth/txtracker/tracker_test.go | 73 +++++++++++++++++++++++++++-------- 4 files changed, 122 insertions(+), 63 deletions(-) diff --git a/eth/dropper.go b/eth/dropper.go index 591e3c4277..3855439ca5 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -73,7 +73,7 @@ type protectionCategory struct { // protectionCategories is the list of protection criteria. Each category // independently selects its top-N peers per pool; the union is protected. var protectionCategories = []protectionCategory{ - {"total-finalized", func(s txtracker.PeerStats) float64 { return float64(s.Finalized) }, inclusionProtectionFrac}, + {"recent-finalized", func(s txtracker.PeerStats) float64 { return s.RecentFinalized }, inclusionProtectionFrac}, {"recent-included", func(s txtracker.PeerStats) float64 { return s.RecentIncluded }, inclusionProtectionFrac}, } @@ -89,12 +89,14 @@ var protectionCategories = []protectionCategory{ // - Trusted and static peers are never dropped. // - Recently connected peers are also protected from dropping to give them time // to prove their value before being at risk of disconnection. -// - Some peers are protected from dropping based on their role. This is not based -// on a unified score function, but rather on the concept of protected peer pools. -// Each pool independently selects its top fraction of peers by a specific score -// (e.g. total finalized inclusions, recent inclusion EMA); the union of all -// protected sets is shielded from random dropping, and the drop target is chosen -// randomly from the remainder. +// - Some peers are protected from dropping based on their contribution +// to the tx pool. Each pool (inbound/dialed) independently selects its +// top fraction of peers by a per-peer EMA score — a slow EMA of +// finalized inclusions (~1-day half-life, rewards sustained long-term +// contribution) and a fast EMA of recent block inclusions (rewards +// current activity). The union of all protected sets is shielded from +// random dropping, and the drop target is chosen randomly from the +// remainder. type dropper struct { maxDialPeers int // maximum number of dialed peers maxInboundPeers int // maximum number of inbound peers diff --git a/eth/dropper_test.go b/eth/dropper_test.go index a85eb0011b..fd2ed9b611 100644 --- a/eth/dropper_test.go +++ b/eth/dropper_test.go @@ -64,7 +64,7 @@ func TestProtectedPeersTopPeer(t *testing.T) { peers := makePeers(20) stats := make(map[string]txtracker.PeerStats) - stats[peers[0].ID().String()] = txtracker.PeerStats{Finalized: 100} + stats[peers[0].ID().String()] = txtracker.PeerStats{RecentFinalized: 100} stats[peers[1].ID().String()] = txtracker.PeerStats{RecentIncluded: 5.0} cm.peerStatsFunc = func() map[string]txtracker.PeerStats { return stats } @@ -74,7 +74,7 @@ func TestProtectedPeersTopPeer(t *testing.T) { t.Fatalf("expected 2 protected peers, got %d", len(protected)) } if !protected[peers[0]] { - t.Fatal("peer 0 should be protected (top Finalized)") + t.Fatal("peer 0 should be protected (top RecentFinalized)") } if !protected[peers[1]] { t.Fatal("peer 1 should be protected (top RecentIncluded)") @@ -103,7 +103,7 @@ func TestProtectedPeersOverlap(t *testing.T) { peers := makePeers(20) stats := make(map[string]txtracker.PeerStats) - stats[peers[0].ID().String()] = txtracker.PeerStats{Finalized: 100, RecentIncluded: 5.0} + stats[peers[0].ID().String()] = txtracker.PeerStats{RecentFinalized: 100, RecentIncluded: 5.0} cm.peerStatsFunc = func() map[string]txtracker.PeerStats { return stats } @@ -128,7 +128,7 @@ func TestProtectedPeersNilFunc(t *testing.T) { // independently in each of the inbound and dialed pools, not globally. // With 10 peers per pool and inclusionProtectionFrac=0.1, exactly 1 peer // is protected per pool per category — so 2 total (one per pool), both -// for the Finalized category since we don't set RecentIncluded. +// for the RecentFinalized category since we don't set RecentIncluded. func TestProtectedByPoolPerPoolTopN(t *testing.T) { inbound := makePeers(10) dialed := makePeers(10) @@ -140,10 +140,10 @@ func TestProtectedByPoolPerPoolTopN(t *testing.T) { // Strictly increasing scores: highest wins in each pool. stats := make(map[string]txtracker.PeerStats) for i, p := range inbound { - stats[p.ID().String()] = txtracker.PeerStats{Finalized: int64(1 + i)} + stats[p.ID().String()] = txtracker.PeerStats{RecentFinalized: float64(1 + i)} } for i, p := range dialed { - stats[p.ID().String()] = txtracker.PeerStats{Finalized: int64(1 + i)} + stats[p.ID().String()] = txtracker.PeerStats{RecentFinalized: float64(1 + i)} } protected := protectedPeersByPool(inbound, dialed, stats) @@ -168,15 +168,15 @@ func TestProtectedByPoolPerPoolTopN(t *testing.T) { func TestProtectedByPoolCrossCategoryOverlap(t *testing.T) { // 20 dialed peers so 0.1 * 20 = 2 protected per category. dialed := makePeers(20) - // P0: high Finalized only. P1: high RecentIncluded only. P2: high both. + // P0: high RecentFinalized only. P1: high RecentIncluded only. P2: high both. // With n=2 per category: - // Finalized winners: P2 (tie-broken-ok), P0 + // RecentFinalized winners: P2 (tie-broken-ok), P0 // RecentIncluded winners: P2, P1 // Union: {P0, P1, P2}. stats := make(map[string]txtracker.PeerStats) - stats[dialed[0].ID().String()] = txtracker.PeerStats{Finalized: 100, RecentIncluded: 0} - stats[dialed[1].ID().String()] = txtracker.PeerStats{Finalized: 0, RecentIncluded: 5.0} - stats[dialed[2].ID().String()] = txtracker.PeerStats{Finalized: 200, RecentIncluded: 10.0} + stats[dialed[0].ID().String()] = txtracker.PeerStats{RecentFinalized: 100, RecentIncluded: 0} + stats[dialed[1].ID().String()] = txtracker.PeerStats{RecentFinalized: 0, RecentIncluded: 5.0} + stats[dialed[2].ID().String()] = txtracker.PeerStats{RecentFinalized: 200, RecentIncluded: 10.0} protected := protectedPeersByPool(nil, dialed, stats) @@ -196,7 +196,7 @@ func TestProtectedByPoolCrossCategoryOverlap(t *testing.T) { // top-N must still protect the top dialed peers. func TestProtectedByPoolPerPoolIndependence(t *testing.T) { // 20 inbound, 20 dialed — frac=0.1 → 2 protected per pool per category. - // Global top-4 of Finalized would be inbound[16..19] — zero dialed. + // Global top-4 of RecentFinalized would be inbound[16..19] — zero dialed. inbound := makePeers(20) dialed := make([]*p2p.Peer, 20) for i := range dialed { @@ -206,15 +206,15 @@ func TestProtectedByPoolPerPoolIndependence(t *testing.T) { stats := make(map[string]txtracker.PeerStats) // Every inbound peer outscores every dialed peer. for i, p := range inbound { - stats[p.ID().String()] = txtracker.PeerStats{Finalized: int64(1000 + i)} + stats[p.ID().String()] = txtracker.PeerStats{RecentFinalized: float64(1000 + i)} } for i, p := range dialed { - stats[p.ID().String()] = txtracker.PeerStats{Finalized: int64(1 + i)} + stats[p.ID().String()] = txtracker.PeerStats{RecentFinalized: float64(1 + i)} } protected := protectedPeersByPool(inbound, dialed, stats) - // Per-pool top-2 of Finalized: + // Per-pool top-2 of RecentFinalized: // inbound: inbound[18], inbound[19] // dialed: dialed[18], dialed[19] // Global top-N would contain zero dialed peers, so asserting the top diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go index b5ebc5ba7e..93797cad9d 100644 --- a/eth/txtracker/tracker.go +++ b/eth/txtracker/tracker.go @@ -41,12 +41,17 @@ const ( maxTracked = 262144 // EMA smoothing factor for per-block inclusion rate. emaAlpha = 0.05 + // EMA smoothing factor for per-block finalization rate. Very slow on + // purpose: finalization is permanent, and the score should reflect + // sustained contribution over long windows, not recent bursts. + // Half-life ≈ 6930 chain heads (~23 hours on 12s blocks). + finalizedEMAAlpha = 0.0001 ) // PeerStats holds the per-peer inclusion data. type PeerStats struct { - Finalized int64 // Cumulative finalized inclusions attributed to this peer - RecentIncluded float64 // EMA of per-block inclusions (at chain head time) + RecentFinalized float64 // EMA of per-block finalization credits (slow) + RecentIncluded float64 // EMA of per-block inclusions (fast) } // Chain is the blockchain interface needed by the tracker. @@ -58,8 +63,8 @@ type Chain interface { } type peerStats struct { - finalized int64 - recentIncluded float64 + recentFinalized float64 + recentIncluded float64 } // Tracker records which peer delivered each transaction and credits peers @@ -159,8 +164,8 @@ func (t *Tracker) GetAllPeerStats() map[string]PeerStats { result := make(map[string]PeerStats, len(t.peers)) for id, ps := range t.peers { result[id] = PeerStats{ - Finalized: ps.finalized, - RecentIncluded: ps.recentIncluded, + RecentFinalized: ps.recentFinalized, + RecentIncluded: ps.recentIncluded, } } return result @@ -195,37 +200,41 @@ func (t *Tracker) handleChainHead(ev core.ChainHeadEvent) { t.mu.Lock() defer t.mu.Unlock() - // Count per-peer inclusions in this block for the EMA. + // Count per-peer inclusions in this block for the inclusion EMA. blockIncl := make(map[string]int) for _, tx := range block.Transactions() { if peer := t.txs[tx.Hash()]; peer != "" { blockIncl[peer]++ } } - // Only credit peers that are still tracked (not disconnected). + // Accumulate per-peer finalization credits over the newly-finalized + // range (possibly zero blocks). Only counts peers still tracked. + blockFinal := t.collectFinalizationCredits() + + // Update both EMAs for all tracked peers (decays inactive ones). // Don't create entries for unknown peers — they may have been // removed by NotifyPeerDrop and should not be resurrected. - // Update EMA for all tracked peers (decay inactive ones). for peer, ps := range t.peers { ps.recentIncluded = (1-emaAlpha)*ps.recentIncluded + emaAlpha*float64(blockIncl[peer]) + ps.recentFinalized = (1-finalizedEMAAlpha)*ps.recentFinalized + finalizedEMAAlpha*float64(blockFinal[peer]) } - // Check if the finalized block has advanced. - t.checkFinalization() } -// checkFinalization credits peers for transactions in newly finalized blocks. -// Must be called with t.mu held. -func (t *Tracker) checkFinalization() { +// collectFinalizationCredits accumulates per-peer finalization credits for +// blocks newly finalized since lastFinalNum. Returns a (possibly empty) map +// keyed by peer ID; advances lastFinalNum. Must be called with t.mu held. +// Peers that have already been removed by NotifyPeerDrop are skipped so +// dropped peers are not resurrected by old on-chain data. +func (t *Tracker) collectFinalizationCredits() map[string]int { + credits := make(map[string]int) finalHeader := t.chain.CurrentFinalBlock() if finalHeader == nil { - return + return credits } finalNum := finalHeader.Number.Uint64() if finalNum <= t.lastFinalNum { - return + return credits } - // Credit peers for all blocks from lastFinalNum+1 to finalNum. - var credited int for num := t.lastFinalNum + 1; num <= finalNum; num++ { block := t.chain.GetBlockByNumber(num) if block == nil { @@ -236,17 +245,24 @@ func (t *Tracker) checkFinalization() { if peer == "" { continue } - ps := t.peers[peer] - if ps == nil { + if _, ok := t.peers[peer]; !ok { continue // peer disconnected, skip credit } - ps.finalized++ - credited++ + credits[peer]++ } } - if credited > 0 { - log.Trace("Credited peers for finalized inclusions", - "from", t.lastFinalNum+1, "to", finalNum, "txs", credited) + if total := sumCounts(credits); total > 0 { + log.Trace("Accumulated finalization credits", + "from", t.lastFinalNum+1, "to", finalNum, "txs", total) } t.lastFinalNum = finalNum + return credits +} + +func sumCounts(m map[string]int) int { + var sum int + for _, v := range m { + sum += v + } + return sum } diff --git a/eth/txtracker/tracker_test.go b/eth/txtracker/tracker_test.go index 013a0068b8..b572597e6e 100644 --- a/eth/txtracker/tracker_test.go +++ b/eth/txtracker/tracker_test.go @@ -174,7 +174,7 @@ func TestNotifyReceived(t *testing.T) { if !ok { t.Fatal("expected peerA entry, not found") } - if ps.Finalized != 0 || ps.RecentIncluded != 0 { + if ps.RecentFinalized != 0 || ps.RecentIncluded != 0 { t.Fatalf("expected zero stats before chain events, got %+v", ps) } @@ -245,19 +245,19 @@ func TestFinalization(t *testing.T) { // Not finalized yet. stats := tr.GetAllPeerStats() - if stats["peerA"].Finalized != 0 { - t.Fatalf("expected Finalized=0 before finalization, got %d", stats["peerA"].Finalized) + if stats["peerA"].RecentFinalized != 0 { + t.Fatalf("expected RecentFinalized=0 before finalization, got %f", stats["peerA"].RecentFinalized) } - // Finalize block 1, then send head 2 to trigger checkFinalization. + // Finalize block 1, then send head 2 to trigger the finalization EMA update. chain.setFinalBlock(1) chain.addBlock(2, nil) chain.sendHead(2) waitStep(t, tr) stats = tr.GetAllPeerStats() - if stats["peerA"].Finalized != 1 { - t.Fatalf("expected Finalized=1 after finalization, got %d", stats["peerA"].Finalized) + if stats["peerA"].RecentFinalized <= 0 { + t.Fatalf("expected RecentFinalized>0 after finalization, got %f", stats["peerA"].RecentFinalized) } } @@ -284,11 +284,11 @@ func TestMultiplePeers(t *testing.T) { waitStep(t, tr) stats := tr.GetAllPeerStats() - if stats["peerA"].Finalized != 1 { - t.Fatalf("peerA: expected Finalized=1, got %d", stats["peerA"].Finalized) + if stats["peerA"].RecentFinalized <= 0 { + t.Fatalf("peerA: expected RecentFinalized>0, got %f", stats["peerA"].RecentFinalized) } - if stats["peerB"].Finalized != 1 { - t.Fatalf("peerB: expected Finalized=1, got %d", stats["peerB"].Finalized) + if stats["peerB"].RecentFinalized <= 0 { + t.Fatalf("peerB: expected RecentFinalized>0, got %f", stats["peerB"].RecentFinalized) } } @@ -312,11 +312,11 @@ func TestFirstDelivererWins(t *testing.T) { waitStep(t, tr) stats := tr.GetAllPeerStats() - if stats["peerA"].Finalized != 1 { - t.Fatalf("peerA should be credited, got Finalized=%d", stats["peerA"].Finalized) + if stats["peerA"].RecentFinalized <= 0 { + t.Fatalf("peerA should be credited, got RecentFinalized=%f", stats["peerA"].RecentFinalized) } - if stats["peerB"].Finalized != 0 { - t.Fatalf("peerB should NOT be credited, got Finalized=%d", stats["peerB"].Finalized) + if stats["peerB"].RecentFinalized != 0 { + t.Fatalf("peerB should NOT be credited, got RecentFinalized=%f", stats["peerB"].RecentFinalized) } } @@ -340,8 +340,8 @@ func TestNoFinalizationCredit(t *testing.T) { waitStep(t, tr) stats := tr.GetAllPeerStats() - if stats["peerA"].Finalized != 0 { - t.Fatalf("expected Finalized=0 without finalization, got %d", stats["peerA"].Finalized) + if stats["peerA"].RecentFinalized != 0 { + t.Fatalf("expected RecentFinalized=0 without finalization, got %f", stats["peerA"].RecentFinalized) } } @@ -413,3 +413,44 @@ func TestReorgSafety(t *testing.T) { t.Fatalf("expected RecentIncluded>0 after canonical-A head event, got %f", got) } } + +// TestRecentFinalizedDecays verifies that the finalization EMA decays +// for a peer that earned credits in the past but has no new +// finalization activity. The decay is slow (α=0.0001), so we +// just assert monotonic decrease, not convergence to zero. +func TestRecentFinalizedDecays(t *testing.T) { + tr := New() + chain := newMockChain() + tr.Start(chain) + defer tr.Stop() + + tx := makeTx(1) + tr.NotifyAccepted("peerA", []common.Hash{tx.Hash()}) + + // Include and finalize in block 1. + chain.addBlock(1, []*types.Transaction{tx}) + chain.sendHead(1) + waitStep(t, tr) + chain.setFinalBlock(1) + chain.addBlock(2, nil) + chain.sendHead(2) + waitStep(t, tr) + + peak := tr.GetAllPeerStats()["peerA"].RecentFinalized + if peak <= 0 { + t.Fatalf("expected RecentFinalized>0 after finalization, got %f", peak) + } + + // Send many empty heads — peer contributes zero each block, + // EMA should decay monotonically. + for i := uint64(3); i <= 50; i++ { + chain.addBlock(i, nil) + chain.sendHead(i) + waitStep(t, tr) + } + + after := tr.GetAllPeerStats()["peerA"].RecentFinalized + if after >= peak { + t.Fatalf("expected RecentFinalized to decay, got %f >= peak %f", after, peak) + } +} From 111d90aef84b99b67fc9a815c1f4bf0b63f07657 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Mon, 13 Apr 2026 20:25:53 +0200 Subject: [PATCH 30/38] eth/txtracker: track per-peer tx-request response latency Adds NotifyRequestLatency(peer, latency) and a slow per-peer EMA (alpha=0.01, ~70-sample half-life) that the dropper will use as a new protection signal. The first sample seeds the EMA directly so fresh peers don't ramp up from zero. RequestSamples is exposed alongside the EMA so consumers can apply a minimum-samples bootstrap guard before trusting the value. Includes design notes for the broader peerdrop-latency feature. --- eth/txtracker/tracker.go | 56 ++++++++++++++++--- eth/txtracker/tracker_test.go | 102 ++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 7 deletions(-) diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go index 93797cad9d..8c678c6561 100644 --- a/eth/txtracker/tracker.go +++ b/eth/txtracker/tracker.go @@ -28,6 +28,7 @@ package txtracker import ( "sync" + "time" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core" @@ -46,12 +47,22 @@ const ( // sustained contribution over long windows, not recent bursts. // Half-life ≈ 6930 chain heads (~23 hours on 12s blocks). finalizedEMAAlpha = 0.0001 + // EMA smoothing factor for per-request latency average. Slow on purpose: + // short bursts shouldn't shift the score, sustained behavior should. + // Half-life ≈ ln(0.5)/ln(0.99) ≈ 69 samples. + latencyEMAAlpha = 0.01 + // MinLatencySamples is the number of latency samples a peer must accumulate + // before its RequestLatencyEMA is considered meaningful for protection. + // Prevents a single lucky-fast reply from displacing established peers. + MinLatencySamples = 10 ) -// PeerStats holds the per-peer inclusion data. +// PeerStats holds the per-peer inclusion and responsiveness data. type PeerStats struct { - RecentFinalized float64 // EMA of per-block finalization credits (slow) - RecentIncluded float64 // EMA of per-block inclusions (fast) + RecentFinalized float64 // EMA of per-block finalization credits (slow) + RecentIncluded float64 // EMA of per-block inclusions (fast) + RequestLatencyEMA time.Duration // Slow EMA of tx-request response latency (timeouts count as the timeout value) + RequestSamples int64 // Number of latency samples seen for this peer } // Chain is the blockchain interface needed by the tracker. @@ -63,8 +74,10 @@ type Chain interface { } type peerStats struct { - recentFinalized float64 - recentIncluded float64 + recentFinalized float64 + recentIncluded float64 + requestLatencyEMA time.Duration + requestSamples int64 } // Tracker records which peer delivered each transaction and credits peers @@ -155,6 +168,33 @@ func (t *Tracker) NotifyAccepted(peer string, hashes []common.Hash) { } } +// NotifyRequestLatency records a tx-request response latency sample for the +// given peer. Timeouts should be reported as the timeout value (so they count +// against the EMA rather than being silently omitted). The EMA uses a slow +// alpha so isolated bursts don't shift the score appreciably. +// Safe to call from any goroutine. +func (t *Tracker) NotifyRequestLatency(peer string, latency time.Duration) { + t.mu.Lock() + defer t.mu.Unlock() + + ps := t.peers[peer] + if ps == nil { + ps = &peerStats{} + t.peers[peer] = ps + } + if ps.requestSamples == 0 { + // Bootstrap the EMA with the first sample so it doesn't drift up + // from zero over many samples before reaching realistic values. + ps.requestLatencyEMA = latency + } else { + ps.requestLatencyEMA = time.Duration( + float64(ps.requestLatencyEMA)*(1-latencyEMAAlpha) + + float64(latency)*latencyEMAAlpha, + ) + } + ps.requestSamples++ +} + // GetAllPeerStats returns a snapshot of per-peer inclusion statistics. // Safe to call from any goroutine. func (t *Tracker) GetAllPeerStats() map[string]PeerStats { @@ -164,8 +204,10 @@ func (t *Tracker) GetAllPeerStats() map[string]PeerStats { result := make(map[string]PeerStats, len(t.peers)) for id, ps := range t.peers { result[id] = PeerStats{ - RecentFinalized: ps.recentFinalized, - RecentIncluded: ps.recentIncluded, + RecentFinalized: ps.recentFinalized, + RecentIncluded: ps.recentIncluded, + RequestLatencyEMA: ps.requestLatencyEMA, + RequestSamples: ps.requestSamples, } } return result diff --git a/eth/txtracker/tracker_test.go b/eth/txtracker/tracker_test.go index b572597e6e..280694b273 100644 --- a/eth/txtracker/tracker_test.go +++ b/eth/txtracker/tracker_test.go @@ -454,3 +454,105 @@ func TestRecentFinalizedDecays(t *testing.T) { t.Fatalf("expected RecentFinalized to decay, got %f >= peak %f", after, peak) } } + +// TestRequestLatencyFirstSampleBootstrap asserts that the first latency +// sample seeds the EMA directly (no slow ramp-up from zero), and that the +// sample counter starts at 1. +func TestRequestLatencyFirstSampleBootstrap(t *testing.T) { + tr := New() + tr.NotifyRequestLatency("peerA", 200*time.Millisecond) + + stats := tr.GetAllPeerStats() + ps := stats["peerA"] + if ps.RequestLatencyEMA != 200*time.Millisecond { + t.Fatalf("expected first sample to seed EMA at 200ms, got %v", ps.RequestLatencyEMA) + } + if ps.RequestSamples != 1 { + t.Fatalf("expected RequestSamples=1, got %d", ps.RequestSamples) + } +} + +// TestRequestLatencyEMAUpdate verifies the EMA formula (1-α)·old + α·new. +func TestRequestLatencyEMAUpdate(t *testing.T) { + tr := New() + tr.NotifyRequestLatency("peerA", 100*time.Millisecond) + tr.NotifyRequestLatency("peerA", 1000*time.Millisecond) + + // Expected: 0.99*100ms + 0.01*1000ms = 109ms + got := tr.GetAllPeerStats()["peerA"].RequestLatencyEMA + want := 109 * time.Millisecond + delta := got - want + if delta < 0 { + delta = -delta + } + if delta > 1*time.Microsecond { + t.Fatalf("EMA mismatch: got %v, want %v (delta %v)", got, want, delta) + } + if samples := tr.GetAllPeerStats()["peerA"].RequestSamples; samples != 2 { + t.Fatalf("expected RequestSamples=2, got %d", samples) + } +} + +// TestRequestLatencySlowEMAConvergence verifies that the slow alpha +// requires many samples to noticeably shift the EMA. Starting at 100ms +// and feeding 5s (timeout) samples, the EMA should still be well below +// 1s after 50 samples. +func TestRequestLatencySlowEMAConvergence(t *testing.T) { + tr := New() + tr.NotifyRequestLatency("peerA", 100*time.Millisecond) + for i := 0; i < 50; i++ { + tr.NotifyRequestLatency("peerA", 5*time.Second) + } + got := tr.GetAllPeerStats()["peerA"].RequestLatencyEMA + if got < 1*time.Second { + // Expected ≈ (0.99)^50 * 100ms + (1-(0.99)^50) * 5s ≈ 1.99s + // The lower bound proves a meaningful shift; the upper bound (below) + // proves the slow alpha damped the convergence. + t.Fatalf("EMA did not move enough under sustained timeouts, got %v", got) + } + if got > 3*time.Second { + t.Fatalf("EMA converged too fast for slow alpha=0.01, got %v", got) + } +} + +// TestRequestLatencyMultiplePeersIsolated verifies per-peer isolation: a +// sample for peerA does not affect peerB's stats. +func TestRequestLatencyMultiplePeersIsolated(t *testing.T) { + tr := New() + tr.NotifyRequestLatency("peerA", 100*time.Millisecond) + tr.NotifyRequestLatency("peerB", 5*time.Second) + + stats := tr.GetAllPeerStats() + if stats["peerA"].RequestLatencyEMA != 100*time.Millisecond { + t.Errorf("peerA EMA: got %v, want 100ms", stats["peerA"].RequestLatencyEMA) + } + if stats["peerB"].RequestLatencyEMA != 5*time.Second { + t.Errorf("peerB EMA: got %v, want 5s", stats["peerB"].RequestLatencyEMA) + } + if stats["peerA"].RequestSamples != 1 || stats["peerB"].RequestSamples != 1 { + t.Errorf("expected RequestSamples=1 for each peer, got A=%d B=%d", + stats["peerA"].RequestSamples, stats["peerB"].RequestSamples) + } +} + +// TestRequestLatencyPeerDropResetsStats verifies that NotifyPeerDrop +// removes the peer's latency history along with its other stats. +func TestRequestLatencyPeerDropResetsStats(t *testing.T) { + tr := New() + tr.NotifyRequestLatency("peerA", 200*time.Millisecond) + tr.NotifyPeerDrop("peerA") + + if _, ok := tr.GetAllPeerStats()["peerA"]; ok { + t.Fatal("peerA stats should be removed after NotifyPeerDrop") + } + + // A subsequent latency sample re-creates the entry as a fresh peer. + tr.NotifyRequestLatency("peerA", 50*time.Millisecond) + ps := tr.GetAllPeerStats()["peerA"] + if ps.RequestSamples != 1 { + t.Fatalf("expected RequestSamples=1 after re-add, got %d", ps.RequestSamples) + } + if ps.RequestLatencyEMA != 50*time.Millisecond { + t.Fatalf("expected fresh EMA bootstrap, got %v", ps.RequestLatencyEMA) + } +} From b0a266d3c89ad18d4e3e21186b15fa06291d1343 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Mon, 13 Apr 2026 21:05:45 +0200 Subject: [PATCH 31/38] eth/fetcher: add onRequestLatency callback to tx fetcher Adds an optional onRequestLatency(peer, latency) callback to the tx fetcher constructor, fired exactly once per request: - On in-time delivery: the actual round-trip latency (clock.Now - req.time). - On timeout (req.time + txFetchTimeout exceeded): the timeout value itself, so slow peers contribute samples instead of being silently omitted from the downstream EMA. Late deliveries for requests already counted as timeouts do not double-record. Existing callers (handler.go, fuzzer, tests) pass nil for the new parameter; handler wiring to txTracker follows in a separate commit. --- eth/fetcher/tx_fetcher.go | 77 ++++++++------ eth/fetcher/tx_fetcher_test.go | 105 +++++++++++++++++++- eth/handler.go | 2 +- tests/fuzzers/txfetcher/txfetcher_fuzzer.go | 2 +- 4 files changed, 152 insertions(+), 34 deletions(-) diff --git a/eth/fetcher/tx_fetcher.go b/eth/fetcher/tx_fetcher.go index ba48c6cc8d..ffed7d5acf 100644 --- a/eth/fetcher/tx_fetcher.go +++ b/eth/fetcher/tx_fetcher.go @@ -180,11 +180,12 @@ type TxFetcher struct { alternates map[common.Hash]map[string]struct{} // In-flight transaction alternate origins if retrieval fails // Callbacks - validateMeta func(common.Hash, byte) error // Validate a tx metadata based on the local txpool - addTxs func([]*types.Transaction) []error // Insert a batch of transactions into local txpool - fetchTxs func(string, []common.Hash) error // Retrieves a set of txs from a remote peer - dropPeer func(string) // Drops a peer in case of announcement violation - onAccepted func(peer string, hashes []common.Hash) // Optional: notified with accepted tx hashes per peer + validateMeta func(common.Hash, byte) error // Validate a tx metadata based on the local txpool + addTxs func([]*types.Transaction) []error // Insert a batch of transactions into local txpool + fetchTxs func(string, []common.Hash) error // Retrieves a set of txs from a remote peer + dropPeer func(string) // Drops a peer in case of announcement violation + onAccepted func(peer string, hashes []common.Hash) // Optional: notified with accepted tx hashes per peer + onRequestLatency func(peer string, latency time.Duration) // Optional: notified once per completed/timed-out tx request step chan struct{} // Notification channel when the fetcher loop iterates clock mclock.Clock // Monotonic clock or simulated clock for tests @@ -195,40 +196,41 @@ type TxFetcher struct { // NewTxFetcher creates a transaction fetcher to retrieve transaction // based on hash announcements. // Chain can be nil to disable on-chain checks. -func NewTxFetcher(chain *core.BlockChain, validateMeta func(common.Hash, byte) error, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string), onAccepted func(string, []common.Hash)) *TxFetcher { - return NewTxFetcherForTests(chain, validateMeta, addTxs, fetchTxs, dropPeer, onAccepted, mclock.System{}, time.Now, nil) +func NewTxFetcher(chain *core.BlockChain, validateMeta func(common.Hash, byte) error, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string), onAccepted func(string, []common.Hash), onRequestLatency func(string, time.Duration)) *TxFetcher { + return NewTxFetcherForTests(chain, validateMeta, addTxs, fetchTxs, dropPeer, onAccepted, onRequestLatency, mclock.System{}, time.Now, nil) } // NewTxFetcherForTests is a testing method to mock out the realtime clock with // a simulated version and the internal randomness with a deterministic one. // Chain can be nil to disable on-chain checks. func NewTxFetcherForTests( - chain *core.BlockChain, validateMeta func(common.Hash, byte) error, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string), onAccepted func(string, []common.Hash), + chain *core.BlockChain, validateMeta func(common.Hash, byte) error, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string), onAccepted func(string, []common.Hash), onRequestLatency func(string, time.Duration), clock mclock.Clock, realTime func() time.Time, rand *mrand.Rand) *TxFetcher { return &TxFetcher{ - notify: make(chan *txAnnounce), - cleanup: make(chan *txDelivery), - drop: make(chan *txDrop), - quit: make(chan struct{}), - waitlist: make(map[common.Hash]map[string]struct{}), - waittime: make(map[common.Hash]mclock.AbsTime), - waitslots: make(map[string]map[common.Hash]*txMetadataWithSeq), - announces: make(map[string]map[common.Hash]*txMetadataWithSeq), - announced: make(map[common.Hash]map[string]struct{}), - fetching: make(map[common.Hash]string), - requests: make(map[string]*txRequest), - alternates: make(map[common.Hash]map[string]struct{}), - underpriced: lru.NewCache[common.Hash, time.Time](maxTxUnderpricedSetSize), - txOnChainCache: lru.NewCache[common.Hash, struct{}](txOnChainCacheLimit), - chain: chain, - validateMeta: validateMeta, - addTxs: addTxs, - fetchTxs: fetchTxs, - dropPeer: dropPeer, - onAccepted: onAccepted, - clock: clock, - realTime: realTime, - rand: rand, + notify: make(chan *txAnnounce), + cleanup: make(chan *txDelivery), + drop: make(chan *txDrop), + quit: make(chan struct{}), + waitlist: make(map[common.Hash]map[string]struct{}), + waittime: make(map[common.Hash]mclock.AbsTime), + waitslots: make(map[string]map[common.Hash]*txMetadataWithSeq), + announces: make(map[string]map[common.Hash]*txMetadataWithSeq), + announced: make(map[common.Hash]map[string]struct{}), + fetching: make(map[common.Hash]string), + requests: make(map[string]*txRequest), + alternates: make(map[common.Hash]map[string]struct{}), + underpriced: lru.NewCache[common.Hash, time.Time](maxTxUnderpricedSetSize), + txOnChainCache: lru.NewCache[common.Hash, struct{}](txOnChainCacheLimit), + chain: chain, + validateMeta: validateMeta, + addTxs: addTxs, + fetchTxs: fetchTxs, + dropPeer: dropPeer, + onAccepted: onAccepted, + onRequestLatency: onRequestLatency, + clock: clock, + realTime: realTime, + rand: rand, } } @@ -673,6 +675,14 @@ func (f *TxFetcher) loop() { // Keep track of the request as dangling, but never expire f.requests[peer].hashes = nil txFetcherSlowPeers.Inc(1) + // Record the request as a timeout-latency sample. The slow + // EMA in the consumer counts timeouts as the timeout value + // itself, so a peer that times out repeatedly drags its + // score down without us having to wait for an eventual + // (possibly never-arriving) reply. + if f.onRequestLatency != nil { + f.onRequestLatency(peer, txFetchTimeout) + } } } // Schedule a new transaction retrieval @@ -769,6 +779,11 @@ func (f *TxFetcher) loop() { if req.hashes == nil { txFetcherSlowPeers.Dec(1) txFetcherSlowWait.Update(time.Duration(f.clock.Now() - req.time).Nanoseconds()) + // Already counted as a timeout sample at the timeout site; + // don't double-record on eventual delivery. + } else if f.onRequestLatency != nil { + // Normal in-time delivery. Record the actual round-trip. + f.onRequestLatency(delivery.origin, time.Duration(f.clock.Now()-req.time)) } delete(f.requests, delivery.origin) diff --git a/eth/fetcher/tx_fetcher_test.go b/eth/fetcher/tx_fetcher_test.go index 3fe11fda21..92f3979acb 100644 --- a/eth/fetcher/tx_fetcher_test.go +++ b/eth/fetcher/tx_fetcher_test.go @@ -22,6 +22,7 @@ import ( "math/big" "math/rand" "slices" + "sync" "testing" "time" @@ -97,7 +98,7 @@ func newTestTxFetcher() *TxFetcher { return make([]error, len(txs)) }, func(string, []common.Hash) error { return nil }, - nil, nil, + nil, nil, nil, ) } @@ -2204,6 +2205,7 @@ func TestTransactionForgotten(t *testing.T) { func(string, []common.Hash) error { return nil }, func(string) {}, nil, + nil, mockClock, mockTime, rand.New(rand.NewSource(0)), // Use fixed seed for deterministic behavior @@ -2284,3 +2286,104 @@ func TestTransactionForgotten(t *testing.T) { t.Errorf("wrong final underpriced cache size: got %d, want 1", size) } } + +// latencyRecorder is a thread-safe recorder for onRequestLatency callbacks. +type latencyRecorder struct { + mu sync.Mutex + samples []latencySample +} + +type latencySample struct { + peer string + latency time.Duration +} + +func (r *latencyRecorder) record(peer string, latency time.Duration) { + r.mu.Lock() + defer r.mu.Unlock() + r.samples = append(r.samples, latencySample{peer, latency}) +} + +func (r *latencyRecorder) snapshot() []latencySample { + r.mu.Lock() + defer r.mu.Unlock() + out := make([]latencySample, len(r.samples)) + copy(out, r.samples) + return out +} + +// TestTransactionFetcherRequestLatencyOnDelivery asserts that an in-time +// direct delivery of a requested batch fires the onRequestLatency callback +// exactly once with the actual round-trip latency. +func TestTransactionFetcherRequestLatencyOnDelivery(t *testing.T) { + rec := &latencyRecorder{} + testTransactionFetcherParallel(t, txFetcherTest{ + init: func() *TxFetcher { + f := newTestTxFetcher() + f.onRequestLatency = rec.record + return f + }, + steps: []interface{}{ + doTxNotify{peer: "A", hashes: []common.Hash{testTxsHashes[0]}, types: []byte{testTxs[0].Type()}, sizes: []uint32{uint32(testTxs[0].Size())}}, + // Wait for the announce-arrival timer; request is dispatched at this point. + doWait{time: txArriveTimeout, step: true}, + // Simulate 200ms round-trip before the response arrives. + doWait{time: 200 * time.Millisecond, step: false}, + doTxEnqueue{peer: "A", txs: []*types.Transaction{testTxs[0]}, direct: true}, + doFunc(func() { + samples := rec.snapshot() + if len(samples) != 1 { + t.Fatalf("expected 1 latency sample, got %d (%v)", len(samples), samples) + } + if samples[0].peer != "A" { + t.Errorf("peer mismatch: got %q, want A", samples[0].peer) + } + if samples[0].latency != 200*time.Millisecond { + t.Errorf("latency mismatch: got %v, want 200ms", samples[0].latency) + } + }), + }, + }) +} + +// TestTransactionFetcherRequestLatencyOnTimeout asserts that when a request +// times out (no reply within txFetchTimeout), onRequestLatency fires once +// with the timeout value, and a subsequent (late) delivery does not fire +// a duplicate sample. +func TestTransactionFetcherRequestLatencyOnTimeout(t *testing.T) { + rec := &latencyRecorder{} + testTransactionFetcherParallel(t, txFetcherTest{ + init: func() *TxFetcher { + f := newTestTxFetcher() + f.onRequestLatency = rec.record + return f + }, + steps: []interface{}{ + doTxNotify{peer: "A", hashes: []common.Hash{testTxsHashes[0]}, types: []byte{testTxs[0].Type()}, sizes: []uint32{uint32(testTxs[0].Size())}}, + doWait{time: txArriveTimeout, step: true}, + // Push the clock past the request deadline; the timeout handler + // should fire and record a single timeout-valued sample. + doWait{time: txFetchTimeout, step: true}, + doFunc(func() { + samples := rec.snapshot() + if len(samples) != 1 { + t.Fatalf("expected 1 timeout sample, got %d (%v)", len(samples), samples) + } + if samples[0].peer != "A" { + t.Errorf("peer mismatch: got %q, want A", samples[0].peer) + } + if samples[0].latency != txFetchTimeout { + t.Errorf("latency mismatch: got %v, want %v", samples[0].latency, txFetchTimeout) + } + }), + // A late reply from the slow peer must not produce a second sample. + doTxEnqueue{peer: "A", txs: []*types.Transaction{testTxs[0]}, direct: true}, + doFunc(func() { + samples := rec.snapshot() + if len(samples) != 1 { + t.Fatalf("late delivery double-counted latency: got %d samples, want 1", len(samples)) + } + }), + }, + }) +} diff --git a/eth/handler.go b/eth/handler.go index d809053533..6e6b0bb8ac 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -191,7 +191,7 @@ func newHandler(config *handlerConfig) (*handler, error) { return nil } h.txTracker = txtracker.New() - h.txFetcher = fetcher.NewTxFetcher(h.chain, validateMeta, addTxs, fetchTx, h.removePeer, h.txTracker.NotifyAccepted) + h.txFetcher = fetcher.NewTxFetcher(h.chain, validateMeta, addTxs, fetchTx, h.removePeer, h.txTracker.NotifyAccepted, nil) return h, nil } diff --git a/tests/fuzzers/txfetcher/txfetcher_fuzzer.go b/tests/fuzzers/txfetcher/txfetcher_fuzzer.go index 69674d0e62..10505af4af 100644 --- a/tests/fuzzers/txfetcher/txfetcher_fuzzer.go +++ b/tests/fuzzers/txfetcher/txfetcher_fuzzer.go @@ -84,7 +84,7 @@ func fuzz(input []byte) int { return make([]error, len(txs)) }, func(string, []common.Hash) error { return nil }, - nil, nil, + nil, nil, nil, clock, func() time.Time { nanoTime := int64(clock.Now()) From 275020f988eb0b93875c578a9f7235397af65aac Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Mon, 13 Apr 2026 21:06:17 +0200 Subject: [PATCH 32/38] eth: wire txTracker.NotifyRequestLatency into tx fetcher Plugs the tx fetcher's new onRequestLatency callback into the txtracker's per-peer latency EMA. Tx-request round-trip and timeout samples now flow into Tracker state and become available to the dropper as a per-peer PeerInclusionStats.RequestLatencyEMA signal. --- eth/handler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eth/handler.go b/eth/handler.go index 6e6b0bb8ac..d7d02295f4 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -191,7 +191,7 @@ func newHandler(config *handlerConfig) (*handler, error) { return nil } h.txTracker = txtracker.New() - h.txFetcher = fetcher.NewTxFetcher(h.chain, validateMeta, addTxs, fetchTx, h.removePeer, h.txTracker.NotifyAccepted, nil) + h.txFetcher = fetcher.NewTxFetcher(h.chain, validateMeta, addTxs, fetchTx, h.removePeer, h.txTracker.NotifyAccepted, h.txTracker.NotifyRequestLatency) return h, nil } From c82be6827ff508192a388f348ca700677f0b596e Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Mon, 13 Apr 2026 21:08:59 +0200 Subject: [PATCH 33/38] eth: add request-latency peer protection category MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a third protection category to the dropper, scoring peers by per-peer tx-request response latency. Fast peers are harder to drop; peers that chronically time out (their EMA drifts toward the 5s timeout sample) score low and are normal drop candidates. PeerInclusionStats gains RequestLatencyEMA (time.Duration) and RequestSamples (int64). The stats adapter in backend.go copies them from txtracker.PeerStats. The scoring function returns 1/EMA once the peer has >= MinLatencySamples (10) recorded samples — an under-sampled peer scores 0 and is filtered by the existing "score <= 0" rule, preventing a single lucky-fast reply from displacing established peers. Adds three unit tests via protectedPeersByPool for the basic top-N selection, the bootstrap guard, and per-pool independence. --- eth/dropper.go | 15 +++++++ eth/dropper_test.go | 106 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) diff --git a/eth/dropper.go b/eth/dropper.go index 3855439ca5..7b04656b09 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -75,6 +75,21 @@ type protectionCategory struct { var protectionCategories = []protectionCategory{ {"recent-finalized", func(s txtracker.PeerStats) float64 { return s.RecentFinalized }, inclusionProtectionFrac}, {"recent-included", func(s txtracker.PeerStats) float64 { return s.RecentIncluded }, inclusionProtectionFrac}, + {"request-latency", func(s txtracker.PeerStats) float64 { + // Low-latency peers should rank higher. Peers with too few samples + // score 0 so the existing `score <= 0` filter excludes them — this + // prevents a single lucky-fast reply from winning protection. Peers + // whose EMA reaches the timeout also score 0 by this path because + // the reciprocal of a very large duration is tiny but positive; the + // per-pool top-N will still push faster peers ahead of them. + if s.RequestSamples < txtracker.MinLatencySamples { + return 0 + } + if s.RequestLatencyEMA <= 0 { + return 0 + } + return 1.0 / float64(s.RequestLatencyEMA) + }, inclusionProtectionFrac}, } // dropper monitors the state of the peer pool and introduces churn by diff --git a/eth/dropper_test.go b/eth/dropper_test.go index fd2ed9b611..cd414925ce 100644 --- a/eth/dropper_test.go +++ b/eth/dropper_test.go @@ -19,6 +19,7 @@ package eth import ( "fmt" "testing" + "time" "github.com/ethereum/go-ethereum/eth/txtracker" "github.com/ethereum/go-ethereum/p2p" @@ -232,3 +233,108 @@ func TestProtectedByPoolPerPoolIndependence(t *testing.T) { t.Fatalf("expected 4 protected peers (top-2 of each pool), got %d", len(protected)) } } + +// TestProtectedByPoolRequestLatencyBasic verifies the latency protection +// category: with no competing inclusion stats, the lowest-latency peers +// (among those with enough samples) win top-N protection. +func TestProtectedByPoolRequestLatencyBasic(t *testing.T) { + dialed := makePeers(20) // frac=0.1 → n=2 per category + stats := make(map[string]txtracker.PeerStats) + // Three peers have enough samples; the two fastest should win. + stats[dialed[0].ID().String()] = txtracker.PeerStats{ + RequestLatencyEMA: 50 * time.Millisecond, + RequestSamples: 50, + } + stats[dialed[1].ID().String()] = txtracker.PeerStats{ + RequestLatencyEMA: 100 * time.Millisecond, + RequestSamples: 50, + } + stats[dialed[2].ID().String()] = txtracker.PeerStats{ + RequestLatencyEMA: 2 * time.Second, + RequestSamples: 50, + } + + protected := protectedPeersByPool(nil, dialed, stats) + + if !protected[dialed[0]] { + t.Error("fastest peer should be protected") + } + if !protected[dialed[1]] { + t.Error("second-fastest peer should be protected") + } + if protected[dialed[2]] { + t.Error("slowest peer should not be in top-2") + } + if len(protected) != 2 { + t.Fatalf("expected top-2 latency protection, got %d", len(protected)) + } +} + +// TestProtectedByPoolRequestLatencyBootstrapGuard verifies that peers with +// fewer than MinLatencySamples do not earn latency-based protection, even +// if their few samples indicate very low latency. +func TestProtectedByPoolRequestLatencyBootstrapGuard(t *testing.T) { + dialed := makePeers(20) + stats := make(map[string]txtracker.PeerStats) + // A lucky-fast peer with only 1 sample — must NOT be protected. + stats[dialed[0].ID().String()] = txtracker.PeerStats{ + RequestLatencyEMA: 1 * time.Millisecond, + RequestSamples: 1, + } + // A warmed-up but slower peer — should be protected on latency. + stats[dialed[1].ID().String()] = txtracker.PeerStats{ + RequestLatencyEMA: 500 * time.Millisecond, + RequestSamples: txtracker.MinLatencySamples, + } + + protected := protectedPeersByPool(nil, dialed, stats) + + if protected[dialed[0]] { + t.Error("under-sampled peer should not be protected (bootstrap guard)") + } + if !protected[dialed[1]] { + t.Error("warmed-up peer should be protected") + } +} + +// TestProtectedByPoolRequestLatencyPerPool verifies that the latency +// category selects top-N per pool independently, consistent with the +// other categories. An inbound peer with lower latency does not prevent +// a dialed peer from being protected as top of the dialed pool. +func TestProtectedByPoolRequestLatencyPerPool(t *testing.T) { + inbound := makePeers(20) + dialed := make([]*p2p.Peer, 20) + for i := range dialed { + id := enode.ID{byte(100 + i)} + dialed[i] = p2p.NewPeer(id, fmt.Sprintf("dialed%d", i), nil) + } + stats := make(map[string]txtracker.PeerStats) + // All inbound peers are very fast (50ms). + for _, p := range inbound { + stats[p.ID().String()] = txtracker.PeerStats{ + RequestLatencyEMA: 50 * time.Millisecond, + RequestSamples: 50, + } + } + // Dialed peers are slower (1s) — globally they would all lose, but + // per-pool top-N should still protect two of them. + for _, p := range dialed { + stats[p.ID().String()] = txtracker.PeerStats{ + RequestLatencyEMA: 1 * time.Second, + RequestSamples: 50, + } + } + + protected := protectedPeersByPool(inbound, dialed, stats) + + // 2 from inbound + 2 from dialed = 4. + var dialedProtected int + for _, p := range dialed { + if protected[p] { + dialedProtected++ + } + } + if dialedProtected != 2 { + t.Fatalf("expected 2 dialed peers protected by per-pool top-N, got %d", dialedProtected) + } +} From 06c5ce83729ee42543a7057db0f2e3c33f1fda21 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Wed, 15 Apr 2026 12:29:04 +0200 Subject: [PATCH 34/38] eth/peerstats: split peer quality aggregation out of txtracker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a new eth/peerstats package as the single home for per-peer quality metrics consumed by the dropper. txtracker shrinks to a pure tx-lifecycle role: it maps tx hash to deliverer, subscribes to chain heads, computes per-block per-peer inclusion and finalization deltas, and emits them to a StatsConsumer. peerstats owns the aggregates: inclusion EMA, finalized counter, latency EMA, sample counter, and the MinLatencySamples bootstrap constant the dropper uses to filter under-sampled peers. It's a plain struct with a mutex — no goroutine of its own, no lifecycle management. The fetcher's onRequestLatency callback now flows to peerStats.NotifyRequestLatency, the handler's unregisterPeer cleans up via peerStats.NotifyPeerDrop, and the dropper reads its snapshot via peerStats.GetAllPeerStats. txtracker.handleChainHead computes deltas under its own lock, then releases the lock before calling the consumer, which avoids any cross-package lock ordering. Tests are split along the same line: tracker tests use a mock consumer to assert what signals are emitted, peerstats tests cover EMA math and aggregation semantics directly. --- eth/backend.go | 8 +- eth/dropper.go | 16 +- eth/dropper_test.go | 70 ++--- eth/handler.go | 7 +- eth/peerstats/peerstats.go | 172 ++++++++++++ eth/peerstats/peerstats_test.go | 223 +++++++++++++++ eth/txtracker/tracker.go | 182 ++++-------- eth/txtracker/tracker_test.go | 472 ++++++++++---------------------- 8 files changed, 639 insertions(+), 511 deletions(-) create mode 100644 eth/peerstats/peerstats.go create mode 100644 eth/peerstats/peerstats_test.go diff --git a/eth/backend.go b/eth/backend.go index 551506f6f3..1ca0989565 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -459,11 +459,13 @@ func (s *Ethereum) Start() error { // Start the networking layer s.handler.Start(s.p2pServer.MaxPeers) - // Start the transaction tracker (records tx deliveries, credits peer inclusions). - s.handler.txTracker.Start(s.blockchain) + // Start the transaction tracker; it emits per-block inclusion and + // finalization signals to peerStats, which the dropper queries for + // protection decisions. + s.handler.txTracker.Start(s.blockchain, s.handler.peerStats) // Start the connection manager with inclusion-based peer protection. - s.dropper.Start(s.p2pServer, func() bool { return !s.Synced() }, s.handler.txTracker.GetAllPeerStats) + s.dropper.Start(s.p2pServer, func() bool { return !s.Synced() }, s.handler.peerStats.GetAllPeerStats) // start log indexer s.filterMaps.Start() diff --git a/eth/dropper.go b/eth/dropper.go index 7b04656b09..0af4b97047 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -25,7 +25,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/mclock" - "github.com/ethereum/go-ethereum/eth/txtracker" + "github.com/ethereum/go-ethereum/eth/peerstats" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/p2p" @@ -60,29 +60,29 @@ var ( ) // Callback type to get per-peer inclusion statistics. -type getPeerStatsFunc func() map[string]txtracker.PeerStats +type getPeerStatsFunc func() map[string]peerstats.PeerStats // protectionCategory defines a peer scoring function and the fraction of peers // to protect per inbound/dialed category. Multiple categories are unioned. type protectionCategory struct { name string - score func(txtracker.PeerStats) float64 + score func(peerstats.PeerStats) float64 frac float64 // fraction of max peers to protect (0.0–1.0) } // protectionCategories is the list of protection criteria. Each category // independently selects its top-N peers per pool; the union is protected. var protectionCategories = []protectionCategory{ - {"recent-finalized", func(s txtracker.PeerStats) float64 { return s.RecentFinalized }, inclusionProtectionFrac}, - {"recent-included", func(s txtracker.PeerStats) float64 { return s.RecentIncluded }, inclusionProtectionFrac}, - {"request-latency", func(s txtracker.PeerStats) float64 { + {"recent-finalized", func(s peerstats.PeerStats) float64 { return s.RecentFinalized }, inclusionProtectionFrac}, + {"recent-included", func(s peerstats.PeerStats) float64 { return s.RecentIncluded }, inclusionProtectionFrac}, + {"request-latency", func(s peerstats.PeerStats) float64 { // Low-latency peers should rank higher. Peers with too few samples // score 0 so the existing `score <= 0` filter excludes them — this // prevents a single lucky-fast reply from winning protection. Peers // whose EMA reaches the timeout also score 0 by this path because // the reciprocal of a very large duration is tiny but positive; the // per-pool top-N will still push faster peers ahead of them. - if s.RequestSamples < txtracker.MinLatencySamples { + if s.RequestSamples < peerstats.MinLatencySamples { return 0 } if s.RequestLatencyEMA <= 0 { @@ -244,7 +244,7 @@ func (cm *dropper) protectedPeers(peers []*p2p.Peer) map[*p2p.Peer]bool { // Factored from protectedPeers so tests can exercise the per-pool // selection logic without needing to construct direction-flagged // *p2p.Peer instances (which require unexported p2p types). -func protectedPeersByPool(inbound, dialed []*p2p.Peer, stats map[string]txtracker.PeerStats) map[*p2p.Peer]bool { +func protectedPeersByPool(inbound, dialed []*p2p.Peer, stats map[string]peerstats.PeerStats) map[*p2p.Peer]bool { result := make(map[*p2p.Peer]bool) // protectPool selects the top-frac peers from pool by score and adds them to result. protectPool := func(pool []*p2p.Peer, score func(*p2p.Peer) float64, frac float64) { diff --git a/eth/dropper_test.go b/eth/dropper_test.go index cd414925ce..e2ed638322 100644 --- a/eth/dropper_test.go +++ b/eth/dropper_test.go @@ -21,7 +21,7 @@ import ( "testing" "time" - "github.com/ethereum/go-ethereum/eth/txtracker" + "github.com/ethereum/go-ethereum/eth/peerstats" "github.com/ethereum/go-ethereum/p2p" "github.com/ethereum/go-ethereum/p2p/enode" ) @@ -37,7 +37,7 @@ func makePeers(n int) []*p2p.Peer { func TestProtectedPeersNoStats(t *testing.T) { cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} - cm.peerStatsFunc = func() map[string]txtracker.PeerStats { return nil } + cm.peerStatsFunc = func() map[string]peerstats.PeerStats { return nil } peers := makePeers(10) protected := cm.protectedPeers(peers) @@ -48,8 +48,8 @@ func TestProtectedPeersNoStats(t *testing.T) { func TestProtectedPeersEmptyStats(t *testing.T) { cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} - cm.peerStatsFunc = func() map[string]txtracker.PeerStats { - return map[string]txtracker.PeerStats{} + cm.peerStatsFunc = func() map[string]peerstats.PeerStats { + return map[string]peerstats.PeerStats{} } peers := makePeers(10) @@ -64,11 +64,11 @@ func TestProtectedPeersTopPeer(t *testing.T) { cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} peers := makePeers(20) - stats := make(map[string]txtracker.PeerStats) - stats[peers[0].ID().String()] = txtracker.PeerStats{RecentFinalized: 100} - stats[peers[1].ID().String()] = txtracker.PeerStats{RecentIncluded: 5.0} + stats := make(map[string]peerstats.PeerStats) + stats[peers[0].ID().String()] = peerstats.PeerStats{RecentFinalized: 100} + stats[peers[1].ID().String()] = peerstats.PeerStats{RecentIncluded: 5.0} - cm.peerStatsFunc = func() map[string]txtracker.PeerStats { return stats } + cm.peerStatsFunc = func() map[string]peerstats.PeerStats { return stats } protected := cm.protectedPeers(peers) if len(protected) != 2 { @@ -86,11 +86,11 @@ func TestProtectedPeersZeroScore(t *testing.T) { cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} peers := makePeers(10) - stats := make(map[string]txtracker.PeerStats) + stats := make(map[string]peerstats.PeerStats) for _, p := range peers { - stats[p.ID().String()] = txtracker.PeerStats{} + stats[p.ID().String()] = peerstats.PeerStats{} } - cm.peerStatsFunc = func() map[string]txtracker.PeerStats { return stats } + cm.peerStatsFunc = func() map[string]peerstats.PeerStats { return stats } protected := cm.protectedPeers(peers) if len(protected) != 0 { @@ -103,10 +103,10 @@ func TestProtectedPeersOverlap(t *testing.T) { cm := &dropper{maxDialPeers: 20, maxInboundPeers: 30} peers := makePeers(20) - stats := make(map[string]txtracker.PeerStats) - stats[peers[0].ID().String()] = txtracker.PeerStats{RecentFinalized: 100, RecentIncluded: 5.0} + stats := make(map[string]peerstats.PeerStats) + stats[peers[0].ID().String()] = peerstats.PeerStats{RecentFinalized: 100, RecentIncluded: 5.0} - cm.peerStatsFunc = func() map[string]txtracker.PeerStats { return stats } + cm.peerStatsFunc = func() map[string]peerstats.PeerStats { return stats } protected := cm.protectedPeers(peers) if len(protected) != 1 { @@ -139,12 +139,12 @@ func TestProtectedByPoolPerPoolTopN(t *testing.T) { dialed[i] = p2p.NewPeer(id, fmt.Sprintf("dialed%d", i), nil) } // Strictly increasing scores: highest wins in each pool. - stats := make(map[string]txtracker.PeerStats) + stats := make(map[string]peerstats.PeerStats) for i, p := range inbound { - stats[p.ID().String()] = txtracker.PeerStats{RecentFinalized: float64(1 + i)} + stats[p.ID().String()] = peerstats.PeerStats{RecentFinalized: float64(1 + i)} } for i, p := range dialed { - stats[p.ID().String()] = txtracker.PeerStats{RecentFinalized: float64(1 + i)} + stats[p.ID().String()] = peerstats.PeerStats{RecentFinalized: float64(1 + i)} } protected := protectedPeersByPool(inbound, dialed, stats) @@ -174,10 +174,10 @@ func TestProtectedByPoolCrossCategoryOverlap(t *testing.T) { // RecentFinalized winners: P2 (tie-broken-ok), P0 // RecentIncluded winners: P2, P1 // Union: {P0, P1, P2}. - stats := make(map[string]txtracker.PeerStats) - stats[dialed[0].ID().String()] = txtracker.PeerStats{RecentFinalized: 100, RecentIncluded: 0} - stats[dialed[1].ID().String()] = txtracker.PeerStats{RecentFinalized: 0, RecentIncluded: 5.0} - stats[dialed[2].ID().String()] = txtracker.PeerStats{RecentFinalized: 200, RecentIncluded: 10.0} + stats := make(map[string]peerstats.PeerStats) + stats[dialed[0].ID().String()] = peerstats.PeerStats{RecentFinalized: 100, RecentIncluded: 0} + stats[dialed[1].ID().String()] = peerstats.PeerStats{RecentFinalized: 0, RecentIncluded: 5.0} + stats[dialed[2].ID().String()] = peerstats.PeerStats{RecentFinalized: 200, RecentIncluded: 10.0} protected := protectedPeersByPool(nil, dialed, stats) @@ -204,13 +204,13 @@ func TestProtectedByPoolPerPoolIndependence(t *testing.T) { id := enode.ID{byte(100 + i)} dialed[i] = p2p.NewPeer(id, fmt.Sprintf("dialed%d", i), nil) } - stats := make(map[string]txtracker.PeerStats) + stats := make(map[string]peerstats.PeerStats) // Every inbound peer outscores every dialed peer. for i, p := range inbound { - stats[p.ID().String()] = txtracker.PeerStats{RecentFinalized: float64(1000 + i)} + stats[p.ID().String()] = peerstats.PeerStats{RecentFinalized: float64(1000 + i)} } for i, p := range dialed { - stats[p.ID().String()] = txtracker.PeerStats{RecentFinalized: float64(1 + i)} + stats[p.ID().String()] = peerstats.PeerStats{RecentFinalized: float64(1 + i)} } protected := protectedPeersByPool(inbound, dialed, stats) @@ -239,17 +239,17 @@ func TestProtectedByPoolPerPoolIndependence(t *testing.T) { // (among those with enough samples) win top-N protection. func TestProtectedByPoolRequestLatencyBasic(t *testing.T) { dialed := makePeers(20) // frac=0.1 → n=2 per category - stats := make(map[string]txtracker.PeerStats) + stats := make(map[string]peerstats.PeerStats) // Three peers have enough samples; the two fastest should win. - stats[dialed[0].ID().String()] = txtracker.PeerStats{ + stats[dialed[0].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 50 * time.Millisecond, RequestSamples: 50, } - stats[dialed[1].ID().String()] = txtracker.PeerStats{ + stats[dialed[1].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 100 * time.Millisecond, RequestSamples: 50, } - stats[dialed[2].ID().String()] = txtracker.PeerStats{ + stats[dialed[2].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 2 * time.Second, RequestSamples: 50, } @@ -275,16 +275,16 @@ func TestProtectedByPoolRequestLatencyBasic(t *testing.T) { // if their few samples indicate very low latency. func TestProtectedByPoolRequestLatencyBootstrapGuard(t *testing.T) { dialed := makePeers(20) - stats := make(map[string]txtracker.PeerStats) + stats := make(map[string]peerstats.PeerStats) // A lucky-fast peer with only 1 sample — must NOT be protected. - stats[dialed[0].ID().String()] = txtracker.PeerStats{ + stats[dialed[0].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 1 * time.Millisecond, RequestSamples: 1, } // A warmed-up but slower peer — should be protected on latency. - stats[dialed[1].ID().String()] = txtracker.PeerStats{ + stats[dialed[1].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 500 * time.Millisecond, - RequestSamples: txtracker.MinLatencySamples, + RequestSamples: peerstats.MinLatencySamples, } protected := protectedPeersByPool(nil, dialed, stats) @@ -308,10 +308,10 @@ func TestProtectedByPoolRequestLatencyPerPool(t *testing.T) { id := enode.ID{byte(100 + i)} dialed[i] = p2p.NewPeer(id, fmt.Sprintf("dialed%d", i), nil) } - stats := make(map[string]txtracker.PeerStats) + stats := make(map[string]peerstats.PeerStats) // All inbound peers are very fast (50ms). for _, p := range inbound { - stats[p.ID().String()] = txtracker.PeerStats{ + stats[p.ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 50 * time.Millisecond, RequestSamples: 50, } @@ -319,7 +319,7 @@ func TestProtectedByPoolRequestLatencyPerPool(t *testing.T) { // Dialed peers are slower (1s) — globally they would all lose, but // per-pool top-N should still protect two of them. for _, p := range dialed { - stats[p.ID().String()] = txtracker.PeerStats{ + stats[p.ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 1 * time.Second, RequestSamples: 50, } diff --git a/eth/handler.go b/eth/handler.go index d7d02295f4..8916dad662 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -38,6 +38,7 @@ import ( "github.com/ethereum/go-ethereum/eth/fetcher" "github.com/ethereum/go-ethereum/eth/protocols/eth" "github.com/ethereum/go-ethereum/eth/protocols/snap" + "github.com/ethereum/go-ethereum/eth/peerstats" "github.com/ethereum/go-ethereum/eth/txtracker" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/event" @@ -125,6 +126,7 @@ type handler struct { downloader *downloader.Downloader txFetcher *fetcher.TxFetcher txTracker *txtracker.Tracker + peerStats *peerstats.Stats peers *peerSet txBroadcastKey [16]byte @@ -191,7 +193,8 @@ func newHandler(config *handlerConfig) (*handler, error) { return nil } h.txTracker = txtracker.New() - h.txFetcher = fetcher.NewTxFetcher(h.chain, validateMeta, addTxs, fetchTx, h.removePeer, h.txTracker.NotifyAccepted, h.txTracker.NotifyRequestLatency) + h.peerStats = peerstats.New() + h.txFetcher = fetcher.NewTxFetcher(h.chain, validateMeta, addTxs, fetchTx, h.removePeer, h.txTracker.NotifyAccepted, h.peerStats.NotifyRequestLatency) return h, nil } @@ -406,7 +409,7 @@ func (h *handler) unregisterPeer(id string) { } h.downloader.UnregisterPeer(id) h.txFetcher.Drop(id) - h.txTracker.NotifyPeerDrop(id) + h.peerStats.NotifyPeerDrop(id) if err := h.peers.unregisterPeer(id); err != nil { logger.Error("Ethereum peer removal failed", "err", err) diff --git a/eth/peerstats/peerstats.go b/eth/peerstats/peerstats.go new file mode 100644 index 0000000000..567a27309a --- /dev/null +++ b/eth/peerstats/peerstats.go @@ -0,0 +1,172 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// Package peerstats maintains per-peer quality metrics used by the peer +// dropper to protect high-value peers from random disconnection. +// +// The package is a passive accumulator: it exposes entry points for its +// signal producers (txtracker for inclusion/finalization, the tx fetcher +// for latency, the handler for peer-drop cleanup) and a read-only +// snapshot for its consumer (the dropper). It has no goroutine of its +// own — all mutation is serialized by a single mutex. +// +// Signal sources: +// - NotifyBlock(inclusions, finalized) — per-block deltas from txtracker +// (computed under txtracker's own lock, then passed in after release) +// - NotifyRequestLatency(peer, latency) — per-request samples from the +// fetcher; timeouts are reported with the timeout value so slow peers +// contribute to the EMA +// - NotifyPeerDrop(peer) — called from the handler on disconnect +package peerstats + +import ( + "sync" + "time" +) + +const ( + // EMA smoothing factor for per-block inclusion rate. + emaAlpha = 0.05 + // EMA smoothing factor for per-block finalization rate. Very slow on + // purpose: finalization is permanent, and the score should reflect + // sustained contribution over long windows, not recent bursts. + // Half-life ≈ 6930 chain heads (~23 hours on 12s blocks). + finalizedEMAAlpha = 0.0001 + // EMA smoothing factor for per-request latency average. Slow on purpose: + // short bursts shouldn't shift the score, sustained behavior should. + // Half-life ≈ ln(0.5)/ln(0.99) ≈ 69 samples. + latencyEMAAlpha = 0.01 + // MinLatencySamples is the number of latency samples a peer must accumulate + // before its RequestLatencyEMA is considered meaningful for protection. + // Prevents a single lucky-fast reply from displacing established peers. + MinLatencySamples = 10 +) + +// PeerStats is the exported per-peer snapshot returned by GetAllPeerStats. +type PeerStats struct { + RecentFinalized float64 // EMA of per-block finalization credits (slow) + RecentIncluded float64 // EMA of per-block inclusions (fast) + RequestLatencyEMA time.Duration // Slow EMA of tx-request response latency (timeouts count as the timeout value) + RequestSamples int64 // Number of latency samples seen (for bootstrap guard) +} + +// peerStats is the internal mutable state per peer. +type peerStats struct { + recentFinalized float64 + recentIncluded float64 + requestLatencyEMA time.Duration + requestSamples int64 +} + +// Stats is the per-peer quality aggregator. +type Stats struct { + mu sync.Mutex + peers map[string]*peerStats +} + +// New creates an empty Stats. +func New() *Stats { + return &Stats{peers: make(map[string]*peerStats)} +} + +// NotifyBlock ingests a per-block update. `inclusions` is the count of the head +// block's transactions attributed to each peer; peers with a positive +// count get a stats entry created if one doesn't exist (this is how +// peerstats learns about newly-active peers). Peers not in the map but +// already tracked have their EMA decay with a zero sample. +// +// `finalized` is per-peer credits accumulated since the last NotifyBlock; +// credits are only applied to peers already tracked — we don't resurrect +// dropped peers from historical finalization data. +// +// NotifyBlock must NOT be called while the caller holds any other lock that +// could be acquired by peerstats callers in reverse order. Current callers +// (txtracker.handleChainHead) release their lock before invoking NotifyBlock. +func (s *Stats) NotifyBlock(inclusions, finalized map[string]int) { + s.mu.Lock() + defer s.mu.Unlock() + + // Ensure a stats entry exists for any peer that just had an inclusion. + // This is the primary path by which peerstats learns about a peer's + // inclusion activity. + for peer, count := range inclusions { + if count > 0 && s.peers[peer] == nil { + s.peers[peer] = &peerStats{} + } + } + // Update inclusion and finalization EMAs for every tracked peer. A + // peer not present in the respective delta map gets a 0 contribution + // — pure decay. Finalization credits for peers no longer tracked are + // ignored (don't resurrect dropped peers from historical data). + for peer, ps := range s.peers { + ps.recentIncluded = (1-emaAlpha)*ps.recentIncluded + emaAlpha*float64(inclusions[peer]) + ps.recentFinalized = (1-finalizedEMAAlpha)*ps.recentFinalized + finalizedEMAAlpha*float64(finalized[peer]) + } +} + +// NotifyRequestLatency records a tx-request response latency sample for +// the given peer. Timeouts should be reported as the timeout value. +// Creates a peer entry if one doesn't exist (a peer may have latency +// samples before any inclusion signal). +func (s *Stats) NotifyRequestLatency(peer string, latency time.Duration) { + s.mu.Lock() + defer s.mu.Unlock() + + ps := s.peers[peer] + if ps == nil { + ps = &peerStats{} + s.peers[peer] = ps + } + if ps.requestSamples == 0 { + // Bootstrap the EMA with the first sample so it doesn't drift up + // from zero over many samples before reaching realistic values. + ps.requestLatencyEMA = latency + } else { + ps.requestLatencyEMA = time.Duration( + float64(ps.requestLatencyEMA)*(1-latencyEMAAlpha) + + float64(latency)*latencyEMAAlpha, + ) + } + ps.requestSamples++ +} + +// NotifyPeerDrop removes a peer's stats on disconnect. A rare stale +// latency sample racing with the drop may recreate the peer entry with +// one sample; that entry can never earn protection (MinLatencySamples +// guard) and is harmless. +func (s *Stats) NotifyPeerDrop(peer string) { + s.mu.Lock() + defer s.mu.Unlock() + delete(s.peers, peer) +} + +// GetAllPeerStats returns a snapshot of per-peer stats. Called by the +// dropper every few minutes; allocation cost is negligible at that rate. +func (s *Stats) GetAllPeerStats() map[string]PeerStats { + s.mu.Lock() + defer s.mu.Unlock() + + result := make(map[string]PeerStats, len(s.peers)) + for id, ps := range s.peers { + result[id] = PeerStats{ + RecentFinalized: ps.recentFinalized, + RecentIncluded: ps.recentIncluded, + RequestLatencyEMA: ps.requestLatencyEMA, + RequestSamples: ps.requestSamples, + } + } + return result +} diff --git a/eth/peerstats/peerstats_test.go b/eth/peerstats/peerstats_test.go new file mode 100644 index 0000000000..42d3bfa385 --- /dev/null +++ b/eth/peerstats/peerstats_test.go @@ -0,0 +1,223 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package peerstats + +import ( + "testing" + "time" +) + +// TestNotifyBlockBootstrapsFromInclusions verifies that a peer with a positive +// inclusion count in the first NotifyBlock gets a stats entry created. +func TestNotifyBlockBootstrapsFromInclusions(t *testing.T) { + s := New() + s.NotifyBlock(map[string]int{"peerA": 3}, nil) + + stats := s.GetAllPeerStats() + if len(stats) != 1 { + t.Fatalf("expected 1 peer entry, got %d", len(stats)) + } + ps, ok := stats["peerA"] + if !ok { + t.Fatal("expected peerA entry") + } + // EMA after first block: (1-0.05)*0 + 0.05*3 = 0.15 + if ps.RecentIncluded <= 0 { + t.Fatalf("expected RecentIncluded > 0 after inclusion, got %f", ps.RecentIncluded) + } +} + +// TestNotifyBlockDecaysKnownPeers verifies that peers already tracked get their +// RecentIncluded EMA decayed when they have no inclusions in a block. +func TestNotifyBlockDecaysKnownPeers(t *testing.T) { + s := New() + // Seed peerA with an inclusion. + s.NotifyBlock(map[string]int{"peerA": 3}, nil) + initial := s.GetAllPeerStats()["peerA"].RecentIncluded + + // Empty block — peerA should decay. + s.NotifyBlock(nil, nil) + after := s.GetAllPeerStats()["peerA"].RecentIncluded + + if after >= initial { + t.Fatalf("expected decay, got %f >= %f", after, initial) + } +} + +// TestNotifyBlockDoesNotResurrectDroppedPeers verifies that finalization +// credits to a peer with no entry don't create one. +func TestNotifyBlockDoesNotResurrectFromFinalization(t *testing.T) { + s := New() + s.NotifyBlock(nil, map[string]int{"peerA": 5}) + + if stats := s.GetAllPeerStats(); len(stats) != 0 { + t.Fatalf("finalization credits must not create entries, got %d peers", len(stats)) + } +} + +// TestNotifyBlockDropThenFinalizeNoResurrect verifies the full drop→finalize +// sequence: a dropped peer doesn't come back via finalization credits. +func TestNotifyBlockDropThenFinalizeNoResurrect(t *testing.T) { + s := New() + s.NotifyBlock(map[string]int{"peerA": 1}, nil) + s.NotifyPeerDrop("peerA") + s.NotifyBlock(nil, map[string]int{"peerA": 10}) + + if stats := s.GetAllPeerStats(); len(stats) != 0 { + t.Fatalf("dropped peer must not be resurrected, got %d peers", len(stats)) + } +} + +// TestNotifyBlockFinalizationCredits an existing peer. +func TestNotifyBlockFinalizationCredits(t *testing.T) { + s := New() + s.NotifyBlock(map[string]int{"peerA": 1}, nil) + s.NotifyBlock(nil, map[string]int{"peerA": 3}) + + // RecentFinalized is a slow EMA, not a cumulative count: assert it + // moved in the positive direction, not the exact value. + if got := s.GetAllPeerStats()["peerA"].RecentFinalized; got <= 0 { + t.Fatalf("expected RecentFinalized>0 after credits, got %f", got) + } +} + +// TestNotifyBlockInclusionEMAUpdate verifies the EMA formula (1-α)·old + α·count. +func TestNotifyBlockInclusionEMAUpdate(t *testing.T) { + s := New() + // Three inclusions: EMA = 0.05 * 3 = 0.15 + s.NotifyBlock(map[string]int{"peerA": 3}, nil) + got := s.GetAllPeerStats()["peerA"].RecentIncluded + want := 0.15 + if diff := got - want; diff < -1e-9 || diff > 1e-9 { + t.Fatalf("EMA after one sample: got %f, want %f", got, want) + } + // Next block with 10 inclusions: EMA = 0.95*0.15 + 0.05*10 = 0.6425 + s.NotifyBlock(map[string]int{"peerA": 10}, nil) + got = s.GetAllPeerStats()["peerA"].RecentIncluded + want = 0.6425 + if diff := got - want; diff < -1e-9 || diff > 1e-9 { + t.Fatalf("EMA after two samples: got %f, want %f", got, want) + } +} + +// TestNotifyRequestLatencyFirstSampleBootstrap asserts that the first +// latency sample seeds the EMA directly. +func TestNotifyRequestLatencyFirstSampleBootstrap(t *testing.T) { + s := New() + s.NotifyRequestLatency("peerA", 200*time.Millisecond) + + ps := s.GetAllPeerStats()["peerA"] + if ps.RequestLatencyEMA != 200*time.Millisecond { + t.Fatalf("expected first sample to seed EMA at 200ms, got %v", ps.RequestLatencyEMA) + } + if ps.RequestSamples != 1 { + t.Fatalf("expected RequestSamples=1, got %d", ps.RequestSamples) + } +} + +// TestNotifyRequestLatencyEMAUpdate verifies the EMA formula for latency. +func TestNotifyRequestLatencyEMAUpdate(t *testing.T) { + s := New() + s.NotifyRequestLatency("peerA", 100*time.Millisecond) + s.NotifyRequestLatency("peerA", 1000*time.Millisecond) + + // Expected: 0.99*100ms + 0.01*1000ms = 109ms + got := s.GetAllPeerStats()["peerA"].RequestLatencyEMA + want := 109 * time.Millisecond + delta := got - want + if delta < 0 { + delta = -delta + } + if delta > 1*time.Microsecond { + t.Fatalf("EMA mismatch: got %v, want %v", got, want) + } + if samples := s.GetAllPeerStats()["peerA"].RequestSamples; samples != 2 { + t.Fatalf("expected RequestSamples=2, got %d", samples) + } +} + +// TestNotifyRequestLatencySlowConvergence verifies the slow alpha +// damps convergence under sustained timeouts. +func TestNotifyRequestLatencySlowConvergence(t *testing.T) { + s := New() + s.NotifyRequestLatency("peerA", 100*time.Millisecond) + for i := 0; i < 50; i++ { + s.NotifyRequestLatency("peerA", 5*time.Second) + } + got := s.GetAllPeerStats()["peerA"].RequestLatencyEMA + if got < 1*time.Second { + t.Fatalf("EMA did not move enough under sustained timeouts, got %v", got) + } + if got > 3*time.Second { + t.Fatalf("EMA converged too fast for slow alpha=0.01, got %v", got) + } +} + +// TestNotifyPeerDropClearsStats verifies that a dropped peer disappears +// from GetAllPeerStats. +func TestNotifyPeerDropClearsStats(t *testing.T) { + s := New() + s.NotifyRequestLatency("peerA", 200*time.Millisecond) + s.NotifyPeerDrop("peerA") + + if _, ok := s.GetAllPeerStats()["peerA"]; ok { + t.Fatal("peerA stats should be removed after NotifyPeerDrop") + } +} + +// TestStaleRequestLatencyAfterDrop documents the accepted behavior: a +// late sample after NotifyPeerDrop recreates a 1-sample entry. The +// dropper's MinLatencySamples=10 guard ensures this is harmless. +func TestStaleRequestLatencyAfterDrop(t *testing.T) { + s := New() + s.NotifyRequestLatency("peerA", 200*time.Millisecond) + s.NotifyPeerDrop("peerA") + // Late sample racing with the drop. + s.NotifyRequestLatency("peerA", 50*time.Millisecond) + + ps := s.GetAllPeerStats()["peerA"] + if ps.RequestSamples != 1 { + t.Fatalf("expected fresh RequestSamples=1, got %d", ps.RequestSamples) + } + if ps.RequestLatencyEMA != 50*time.Millisecond { + t.Fatalf("expected fresh bootstrap at 50ms, got %v", ps.RequestLatencyEMA) + } + // The dropper's MinLatencySamples guard (in eth/dropper.go) prevents + // this 1-sample entry from earning latency-based protection. +} + +// TestMultiplePeersIsolated verifies per-peer isolation across signal types. +func TestMultiplePeersIsolated(t *testing.T) { + s := New() + s.NotifyBlock(map[string]int{"peerA": 5, "peerB": 0}, nil) + s.NotifyRequestLatency("peerA", 100*time.Millisecond) + s.NotifyRequestLatency("peerB", 5*time.Second) + s.NotifyBlock(nil, map[string]int{"peerA": 2}) + + stats := s.GetAllPeerStats() + // Only peerA receives finalization credits; peerB's EMA stays at zero + // (no credits, pure decay from zero). + if stats["peerA"].RecentFinalized <= 0 || stats["peerB"].RecentFinalized != 0 { + t.Errorf("finalization leaked: A=%f B=%f", stats["peerA"].RecentFinalized, stats["peerB"].RecentFinalized) + } + if stats["peerA"].RequestLatencyEMA != 100*time.Millisecond { + t.Errorf("peerA latency: got %v, want 100ms", stats["peerA"].RequestLatencyEMA) + } + if stats["peerB"].RequestLatencyEMA != 5*time.Second { + t.Errorf("peerB latency: got %v, want 5s", stats["peerB"].RequestLatencyEMA) + } +} diff --git a/eth/txtracker/tracker.go b/eth/txtracker/tracker.go index 8c678c6561..661dda32f6 100644 --- a/eth/txtracker/tracker.go +++ b/eth/txtracker/tracker.go @@ -14,21 +14,18 @@ // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . -// Package txtracker provides minimal per-peer transaction inclusion tracking. +// Package txtracker maps accepted transactions to their delivering peer +// and observes chain-head and finalization events to emit per-block +// per-peer signals to a StatsConsumer (typically eth/peerstats). // -// It records which peer delivered each accepted transaction (via NotifyAccepted) -// and monitors the chain for inclusion and finalization events. When a -// delivered transaction is finalized on chain, the delivering peer is -// credited. A per-block exponential moving average (EMA) of inclusions -// tracks recent peer productivity. -// -// The primary consumer is the peer dropper (eth/dropper.go), which uses -// these stats to protect high-value peers from random disconnection. +// The tracker owns the tx-hash → deliverer-peer map with FIFO eviction, +// a chain-head subscription goroutine, and the computation of per-block +// inclusion counts and finalization credits. It does NOT maintain +// per-peer aggregates — that is peerstats' job. package txtracker import ( "sync" - "time" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core" @@ -40,31 +37,8 @@ import ( const ( // Maximum number of tx→deliverer mappings to retain. maxTracked = 262144 - // EMA smoothing factor for per-block inclusion rate. - emaAlpha = 0.05 - // EMA smoothing factor for per-block finalization rate. Very slow on - // purpose: finalization is permanent, and the score should reflect - // sustained contribution over long windows, not recent bursts. - // Half-life ≈ 6930 chain heads (~23 hours on 12s blocks). - finalizedEMAAlpha = 0.0001 - // EMA smoothing factor for per-request latency average. Slow on purpose: - // short bursts shouldn't shift the score, sustained behavior should. - // Half-life ≈ ln(0.5)/ln(0.99) ≈ 69 samples. - latencyEMAAlpha = 0.01 - // MinLatencySamples is the number of latency samples a peer must accumulate - // before its RequestLatencyEMA is considered meaningful for protection. - // Prevents a single lucky-fast reply from displacing established peers. - MinLatencySamples = 10 ) -// PeerStats holds the per-peer inclusion and responsiveness data. -type PeerStats struct { - RecentFinalized float64 // EMA of per-block finalization credits (slow) - RecentIncluded float64 // EMA of per-block inclusions (fast) - RequestLatencyEMA time.Duration // Slow EMA of tx-request response latency (timeouts count as the timeout value) - RequestSamples int64 // Number of latency samples seen for this peer -} - // Chain is the blockchain interface needed by the tracker. type Chain interface { SubscribeChainHeadEvent(ch chan<- core.ChainHeadEvent) event.Subscription @@ -73,22 +47,29 @@ type Chain interface { CurrentFinalBlock() *types.Header } -type peerStats struct { - recentFinalized float64 - recentIncluded float64 - requestLatencyEMA time.Duration - requestSamples int64 +// StatsConsumer receives per-block signals about peer inclusion and +// finalization. The tracker invokes NotifyBlock exactly once per handled chain +// head, AFTER releasing its own lock, with: +// +// - inclusions: per-peer count of transactions in the head block +// - finalized: per-peer count of transactions in blocks that became +// finalized since the previous call (possibly zero-range) +// +// Either map may be empty but the slice/map itself is never nil when +// called. NotifyBlock must not call back into the tracker. +type StatsConsumer interface { + NotifyBlock(inclusions, finalized map[string]int) } -// Tracker records which peer delivered each transaction and credits peers -// when their transactions appear on chain. +// Tracker records which peer delivered each transaction and emits +// per-block inclusion and finalization signals to a StatsConsumer. type Tracker struct { mu sync.Mutex txs map[common.Hash]string // hash → deliverer peer ID - peers map[string]*peerStats - order []common.Hash // insertion order for LRU eviction + order []common.Hash // insertion order for FIFO eviction chain Chain + consumer StatsConsumer lastFinalNum uint64 // last finalized block number processed headCh chan core.ChainHeadEvent sub event.Subscription @@ -101,16 +82,18 @@ type Tracker struct { // New creates a new tracker. func New() *Tracker { return &Tracker{ - txs: make(map[common.Hash]string), - peers: make(map[string]*peerStats), - quit: make(chan struct{}), - step: make(chan struct{}, 1), + txs: make(map[common.Hash]string), + quit: make(chan struct{}), + step: make(chan struct{}, 1), } } -// Start begins listening for chain head events. -func (t *Tracker) Start(chain Chain) { +// Start begins listening for chain head events. `consumer` receives +// per-block signals; if nil, signals are computed but discarded +// (useful in tests that exercise only the tx-lifecycle surface). +func (t *Tracker) Start(chain Chain, consumer StatsConsumer) { t.chain = chain + t.consumer = consumer // Seed lastFinalNum so checkFinalization doesn't backfill from genesis. if fh := chain.CurrentFinalBlock(); fh != nil { t.lastFinalNum = fh.Number.Uint64() @@ -121,14 +104,6 @@ func (t *Tracker) Start(chain Chain) { go t.loop() } -// NotifyPeerDrop removes a disconnected peer's stats to prevent unbounded -// growth. Safe to call from any goroutine. -func (t *Tracker) NotifyPeerDrop(peer string) { - t.mu.Lock() - defer t.mu.Unlock() - delete(t.peers, peer) -} - // Stop shuts down the tracker. func (t *Tracker) Stop() { t.sub.Unsubscribe() @@ -151,10 +126,6 @@ func (t *Tracker) NotifyAccepted(peer string, hashes []common.Hash) { t.txs[hash] = peer t.order = append(t.order, hash) } - // Ensure the delivering peer has a stats entry. - if len(hashes) > 0 && t.peers[peer] == nil { - t.peers[peer] = &peerStats{} - } // Evict oldest entries if over capacity. for len(t.txs) > maxTracked { oldest := t.order[0] @@ -168,51 +139,6 @@ func (t *Tracker) NotifyAccepted(peer string, hashes []common.Hash) { } } -// NotifyRequestLatency records a tx-request response latency sample for the -// given peer. Timeouts should be reported as the timeout value (so they count -// against the EMA rather than being silently omitted). The EMA uses a slow -// alpha so isolated bursts don't shift the score appreciably. -// Safe to call from any goroutine. -func (t *Tracker) NotifyRequestLatency(peer string, latency time.Duration) { - t.mu.Lock() - defer t.mu.Unlock() - - ps := t.peers[peer] - if ps == nil { - ps = &peerStats{} - t.peers[peer] = ps - } - if ps.requestSamples == 0 { - // Bootstrap the EMA with the first sample so it doesn't drift up - // from zero over many samples before reaching realistic values. - ps.requestLatencyEMA = latency - } else { - ps.requestLatencyEMA = time.Duration( - float64(ps.requestLatencyEMA)*(1-latencyEMAAlpha) + - float64(latency)*latencyEMAAlpha, - ) - } - ps.requestSamples++ -} - -// GetAllPeerStats returns a snapshot of per-peer inclusion statistics. -// Safe to call from any goroutine. -func (t *Tracker) GetAllPeerStats() map[string]PeerStats { - t.mu.Lock() - defer t.mu.Unlock() - - result := make(map[string]PeerStats, len(t.peers)) - for id, ps := range t.peers { - result[id] = PeerStats{ - RecentFinalized: ps.recentFinalized, - RecentIncluded: ps.recentIncluded, - RequestLatencyEMA: ps.requestLatencyEMA, - RequestSamples: ps.requestSamples, - } - } - return result -} - func (t *Tracker) loop() { defer t.wg.Done() @@ -232,6 +158,10 @@ func (t *Tracker) loop() { } } +// handleChainHead computes per-peer deltas for the new head block and any +// newly-finalized blocks, then hands them to the StatsConsumer AFTER +// releasing t.mu. The lock-release-before-consumer pattern avoids any +// cross-package lock ordering. func (t *Tracker) handleChainHead(ev core.ChainHeadEvent) { // Fetch the head block by hash (not just number) to avoid using a // reorged block if the tracker goroutine lags behind the chain. @@ -239,35 +169,28 @@ func (t *Tracker) handleChainHead(ev core.ChainHeadEvent) { if block == nil { return } - t.mu.Lock() - defer t.mu.Unlock() - // Count per-peer inclusions in this block for the inclusion EMA. - blockIncl := make(map[string]int) + t.mu.Lock() + // Count per-peer inclusions in the head block. + inclusions := make(map[string]int) for _, tx := range block.Transactions() { if peer := t.txs[tx.Hash()]; peer != "" { - blockIncl[peer]++ + inclusions[peer]++ } } - // Accumulate per-peer finalization credits over the newly-finalized - // range (possibly zero blocks). Only counts peers still tracked. - blockFinal := t.collectFinalizationCredits() + // Compute per-peer finalization credits since the last call. + finalized := t.collectFinalization() + t.mu.Unlock() - // Update both EMAs for all tracked peers (decays inactive ones). - // Don't create entries for unknown peers — they may have been - // removed by NotifyPeerDrop and should not be resurrected. - for peer, ps := range t.peers { - ps.recentIncluded = (1-emaAlpha)*ps.recentIncluded + emaAlpha*float64(blockIncl[peer]) - ps.recentFinalized = (1-finalizedEMAAlpha)*ps.recentFinalized + finalizedEMAAlpha*float64(blockFinal[peer]) + if t.consumer != nil { + t.consumer.NotifyBlock(inclusions, finalized) } } -// collectFinalizationCredits accumulates per-peer finalization credits for -// blocks newly finalized since lastFinalNum. Returns a (possibly empty) map -// keyed by peer ID; advances lastFinalNum. Must be called with t.mu held. -// Peers that have already been removed by NotifyPeerDrop are skipped so -// dropped peers are not resurrected by old on-chain data. -func (t *Tracker) collectFinalizationCredits() map[string]int { +// collectFinalization accumulates per-peer finalization credits for +// blocks newly finalized since lastFinalNum. Returns a (possibly empty) +// map; advances lastFinalNum. Must be called with t.mu held. +func (t *Tracker) collectFinalization() map[string]int { credits := make(map[string]int) finalHeader := t.chain.CurrentFinalBlock() if finalHeader == nil { @@ -283,14 +206,9 @@ func (t *Tracker) collectFinalizationCredits() map[string]int { continue } for _, tx := range block.Transactions() { - peer := t.txs[tx.Hash()] - if peer == "" { - continue + if peer := t.txs[tx.Hash()]; peer != "" { + credits[peer]++ } - if _, ok := t.peers[peer]; !ok { - continue // peer disconnected, skip credit - } - credits[peer]++ } } if total := sumCounts(credits); total > 0 { diff --git a/eth/txtracker/tracker_test.go b/eth/txtracker/tracker_test.go index 280694b273..ad0637eb0a 100644 --- a/eth/txtracker/tracker_test.go +++ b/eth/txtracker/tracker_test.go @@ -79,20 +79,17 @@ func (c *mockChain) CurrentFinalBlock() *types.Header { return &types.Header{Number: new(big.Int).SetUint64(c.finalNum)} } -// addBlock adds a canonical block at the given height. Overwrites any -// prior canonical block at that height. +// addBlock adds a canonical block at the given height. func (c *mockChain) addBlock(num uint64, txs []*types.Transaction) *types.Block { return c.addBlockAtHeight(num, num, txs, true) } // addBlockAtHeight adds a block at the given height. The salt parameter -// ensures distinct block hashes for two blocks at the same height (used -// for reorg tests). If canonical is true, the block becomes the canonical -// block for that height (looked up by GetBlockByNumber). +// ensures distinct block hashes for two blocks at the same height. If +// canonical is true, the block becomes the canonical block for that height. func (c *mockChain) addBlockAtHeight(num, salt uint64, txs []*types.Transaction, canonical bool) *types.Block { c.mu.Lock() defer c.mu.Unlock() - // Mix salt into Extra so siblings at the same height get distinct hashes. header := &types.Header{ Number: new(big.Int).SetUint64(num), Extra: big.NewInt(int64(salt)).Bytes(), @@ -111,9 +108,7 @@ func (c *mockChain) setFinalBlock(num uint64) { c.finalNum = num } -// sendHead emits a chain head event for the canonical block at the given -// height. The emitted header carries the real block's hash so the -// tracker's GetBlock(hash, number) lookup resolves correctly. +// sendHead emits a chain head event for the canonical block at the given height. func (c *mockChain) sendHead(num uint64) { c.mu.Lock() hash := c.canonicalByNum[num] @@ -143,6 +138,49 @@ func makeTx(nonce uint64) *types.Transaction { return types.NewTx(&types.LegacyTx{Nonce: nonce, GasPrice: big.NewInt(1), Gas: 21000}) } +// mockConsumer captures NotifyBlock invocations so tests can assert on the +// signals the tracker emits. +type mockConsumer struct { + mu sync.Mutex + signals []signal +} + +type signal struct { + inclusions, finalized map[string]int +} + +func (c *mockConsumer) NotifyBlock(inclusions, finalized map[string]int) { + c.mu.Lock() + defer c.mu.Unlock() + // Deep-copy so tests inspecting older signals aren't tripped up by + // later iterations mutating the same map (they don't today, but + // this keeps the assertion model simple). + in := make(map[string]int, len(inclusions)) + for k, v := range inclusions { + in[k] = v + } + fn := make(map[string]int, len(finalized)) + for k, v := range finalized { + fn[k] = v + } + c.signals = append(c.signals, signal{in, fn}) +} + +func (c *mockConsumer) last() signal { + c.mu.Lock() + defer c.mu.Unlock() + if len(c.signals) == 0 { + return signal{} + } + return c.signals[len(c.signals)-1] +} + +func (c *mockConsumer) count() int { + c.mu.Lock() + defer c.mu.Unlock() + return len(c.signals) +} + // waitStep blocks until the tracker has processed one event. func waitStep(t *testing.T, tr *Tracker) { t.Helper() @@ -153,33 +191,16 @@ func waitStep(t *testing.T, tr *Tracker) { } } -func TestNotifyReceived(t *testing.T) { +// TestNotifyAcceptedRecordsMapping verifies the tx-lifecycle surface: +// NotifyAccepted records tx→peer mappings in insertion order, with +// first-deliverer-wins semantics on duplicates. +func TestNotifyAcceptedRecordsMapping(t *testing.T) { tr := New() - chain := newMockChain() - tr.Start(chain) - defer tr.Stop() txs := []*types.Transaction{makeTx(1), makeTx(2), makeTx(3)} hashes := hashTxs(txs) tr.NotifyAccepted("peerA", hashes) - // Public surface: peer entry was created with zero stats before any - // chain events. Map lookups would return a zero value for a missing - // key, so assert presence explicitly. - stats := tr.GetAllPeerStats() - if len(stats) != 1 { - t.Fatalf("expected 1 peer entry, got %d", len(stats)) - } - ps, ok := stats["peerA"] - if !ok { - t.Fatal("expected peerA entry, not found") - } - if ps.RecentFinalized != 0 || ps.RecentIncluded != 0 { - t.Fatalf("expected zero stats before chain events, got %+v", ps) - } - - // Internal state: all tx→deliverer mappings recorded, insertion order - // preserved in the FIFO slice. tr.mu.Lock() defer tr.mu.Unlock() if len(tr.txs) != 3 { @@ -198,191 +219,114 @@ func TestNotifyReceived(t *testing.T) { } } -func TestInclusionEMA(t *testing.T) { +// TestNotifyAcceptedFirstDelivererWins verifies duplicate accepts +// preserve the original deliverer. +func TestNotifyAcceptedFirstDelivererWins(t *testing.T) { tr := New() - chain := newMockChain() - tr.Start(chain) - defer tr.Stop() - tx := makeTx(1) tr.NotifyAccepted("peerA", []common.Hash{tx.Hash()}) + tr.NotifyAccepted("peerB", []common.Hash{tx.Hash()}) - // Block 1 includes peerA's tx. - chain.addBlock(1, []*types.Transaction{tx}) - chain.sendHead(1) - waitStep(t, tr) - - stats := tr.GetAllPeerStats() - if stats["peerA"].RecentIncluded <= 0 { - t.Fatalf("expected RecentIncluded > 0 after inclusion, got %f", stats["peerA"].RecentIncluded) + tr.mu.Lock() + defer tr.mu.Unlock() + if got := tr.txs[tx.Hash()]; got != "peerA" { + t.Fatalf("expected first deliverer peerA to win, got %q", got) } - ema1 := stats["peerA"].RecentIncluded - - // Block 2 has no txs from peerA — EMA should decay. - chain.addBlock(2, nil) - chain.sendHead(2) - waitStep(t, tr) - - stats = tr.GetAllPeerStats() - if stats["peerA"].RecentIncluded >= ema1 { - t.Fatalf("expected EMA to decay, got %f >= %f", stats["peerA"].RecentIncluded, ema1) + if len(tr.order) != 1 { + t.Fatalf("expected single order entry, got %d", len(tr.order)) } } -func TestFinalization(t *testing.T) { +// TestHandleChainHeadEmitsInclusions verifies the tracker emits a +// correct per-peer inclusion map to its consumer when a head block +// contains tracked transactions. +func TestHandleChainHeadEmitsInclusions(t *testing.T) { tr := New() chain := newMockChain() - tr.Start(chain) + consumer := &mockConsumer{} + tr.Start(chain, consumer) defer tr.Stop() - tx := makeTx(1) - tr.NotifyAccepted("peerA", []common.Hash{tx.Hash()}) - - // Include in block 1. - chain.addBlock(1, []*types.Transaction{tx}) - chain.sendHead(1) - waitStep(t, tr) - - // Not finalized yet. - stats := tr.GetAllPeerStats() - if stats["peerA"].RecentFinalized != 0 { - t.Fatalf("expected RecentFinalized=0 before finalization, got %f", stats["peerA"].RecentFinalized) - } - - // Finalize block 1, then send head 2 to trigger the finalization EMA update. - chain.setFinalBlock(1) - chain.addBlock(2, nil) - chain.sendHead(2) - waitStep(t, tr) - - stats = tr.GetAllPeerStats() - if stats["peerA"].RecentFinalized <= 0 { - t.Fatalf("expected RecentFinalized>0 after finalization, got %f", stats["peerA"].RecentFinalized) - } -} - -func TestMultiplePeers(t *testing.T) { - tr := New() - chain := newMockChain() - tr.Start(chain) - defer tr.Stop() - - tx1 := makeTx(1) - tx2 := makeTx(2) + tx1, tx2 := makeTx(1), makeTx(2) tr.NotifyAccepted("peerA", []common.Hash{tx1.Hash()}) tr.NotifyAccepted("peerB", []common.Hash{tx2.Hash()}) - // Both included in block 1. chain.addBlock(1, []*types.Transaction{tx1, tx2}) chain.sendHead(1) waitStep(t, tr) - // Finalize. + sig := consumer.last() + if sig.inclusions["peerA"] != 1 { + t.Errorf("peerA inclusions: got %d, want 1", sig.inclusions["peerA"]) + } + if sig.inclusions["peerB"] != 1 { + t.Errorf("peerB inclusions: got %d, want 1", sig.inclusions["peerB"]) + } + if len(sig.finalized) != 0 { + t.Errorf("expected empty finalized map, got %v", sig.finalized) + } +} + +// TestHandleChainHeadEmptyBlock verifies an empty head block emits an +// empty inclusion map (so peerstats can decay all known peers). +func TestHandleChainHeadEmptyBlock(t *testing.T) { + tr := New() + chain := newMockChain() + consumer := &mockConsumer{} + tr.Start(chain, consumer) + defer tr.Stop() + + chain.addBlock(1, nil) + chain.sendHead(1) + waitStep(t, tr) + + sig := consumer.last() + if len(sig.inclusions) != 0 { + t.Errorf("expected empty inclusions, got %v", sig.inclusions) + } +} + +// TestHandleChainHeadEmitsFinalization verifies that when finalization +// advances, the consumer receives per-peer finalization credits +// accumulated over the newly-finalized range. +func TestHandleChainHeadEmitsFinalization(t *testing.T) { + tr := New() + chain := newMockChain() + consumer := &mockConsumer{} + tr.Start(chain, consumer) + defer tr.Stop() + + tx := makeTx(1) + tr.NotifyAccepted("peerA", []common.Hash{tx.Hash()}) + + // Include in block 1, not yet finalized. + chain.addBlock(1, []*types.Transaction{tx}) + chain.sendHead(1) + waitStep(t, tr) + + if credits := consumer.last().finalized["peerA"]; credits != 0 { + t.Fatalf("expected no finalization credits before finalization, got %d", credits) + } + + // Finalize block 1; next head triggers the finalization scan. chain.setFinalBlock(1) chain.addBlock(2, nil) chain.sendHead(2) waitStep(t, tr) - stats := tr.GetAllPeerStats() - if stats["peerA"].RecentFinalized <= 0 { - t.Fatalf("peerA: expected RecentFinalized>0, got %f", stats["peerA"].RecentFinalized) - } - if stats["peerB"].RecentFinalized <= 0 { - t.Fatalf("peerB: expected RecentFinalized>0, got %f", stats["peerB"].RecentFinalized) + if credits := consumer.last().finalized["peerA"]; credits != 1 { + t.Fatalf("expected 1 finalization credit, got %d", credits) } } -func TestFirstDelivererWins(t *testing.T) { - tr := New() - chain := newMockChain() - tr.Start(chain) - defer tr.Stop() - - tx := makeTx(1) - tr.NotifyAccepted("peerA", []common.Hash{tx.Hash()}) - tr.NotifyAccepted("peerB", []common.Hash{tx.Hash()}) // duplicate, should be ignored - - chain.addBlock(1, []*types.Transaction{tx}) - chain.sendHead(1) - waitStep(t, tr) - - chain.setFinalBlock(1) - chain.addBlock(2, nil) - chain.sendHead(2) - waitStep(t, tr) - - stats := tr.GetAllPeerStats() - if stats["peerA"].RecentFinalized <= 0 { - t.Fatalf("peerA should be credited, got RecentFinalized=%f", stats["peerA"].RecentFinalized) - } - if stats["peerB"].RecentFinalized != 0 { - t.Fatalf("peerB should NOT be credited, got RecentFinalized=%f", stats["peerB"].RecentFinalized) - } -} - -func TestNoFinalizationCredit(t *testing.T) { - tr := New() - chain := newMockChain() - tr.Start(chain) - defer tr.Stop() - - tx := makeTx(1) - tr.NotifyAccepted("peerA", []common.Hash{tx.Hash()}) - - // Include but don't finalize. - chain.addBlock(1, []*types.Transaction{tx}) - chain.sendHead(1) - waitStep(t, tr) - - // Send more heads without finalization. - chain.addBlock(2, nil) - chain.sendHead(2) - waitStep(t, tr) - - stats := tr.GetAllPeerStats() - if stats["peerA"].RecentFinalized != 0 { - t.Fatalf("expected RecentFinalized=0 without finalization, got %f", stats["peerA"].RecentFinalized) - } -} - -func TestEMADecay(t *testing.T) { - tr := New() - chain := newMockChain() - tr.Start(chain) - defer tr.Stop() - - tx := makeTx(1) - tr.NotifyAccepted("peerA", []common.Hash{tx.Hash()}) - - // Include in block 1. - chain.addBlock(1, []*types.Transaction{tx}) - chain.sendHead(1) - waitStep(t, tr) - - // Send 30 empty blocks — EMA should decay close to zero. - for i := uint64(2); i <= 31; i++ { - chain.addBlock(i, nil) - chain.sendHead(i) - waitStep(t, tr) - } - - stats := tr.GetAllPeerStats() - if stats["peerA"].RecentIncluded > 0.02 { - t.Fatalf("expected RecentIncluded near zero after 30 empty blocks, got %f", stats["peerA"].RecentIncluded) - } -} - -// TestReorgSafety verifies that handleChainHead resolves the head block by -// HASH (not just by number), so a head event announcing a sibling block at -// the same height does not credit transactions from the canonical block. -// -// Regression check: if the tracker were changed to use GetBlockByNumber, -// it would always fetch the canonical block A and credit peerA even when -// the head points to sibling B. +// TestReorgSafety verifies the tracker resolves the head block by HASH +// so a head event pointing at a sibling block does not emit inclusions +// from the canonical block at the same height. func TestReorgSafety(t *testing.T) { tr := New() chain := newMockChain() - tr.Start(chain) + consumer := &mockConsumer{} + tr.Start(chain, consumer) defer tr.Stop() tx := makeTx(1) @@ -395,164 +339,30 @@ func TestReorgSafety(t *testing.T) { t.Fatal("sibling blocks ended up with the same hash") } - // Head announces sibling B. A hash-aware tracker fetches B, sees no - // peerA txs, and leaves the EMA at zero. A number-only tracker would - // instead fetch A and credit peerA. + // Head announces sibling B — emit must contain no peerA inclusions. chain.sendHeadBlock(blockB) waitStep(t, tr) - - if got := tr.GetAllPeerStats()["peerA"].RecentIncluded; got != 0 { - t.Fatalf("expected RecentIncluded=0 after sibling-B head event, got %f (tracker followed the wrong block)", got) + if incl := consumer.last().inclusions["peerA"]; incl != 0 { + t.Fatalf("sibling-B head should emit 0 peerA inclusions, got %d", incl) } - // Now announce canonical A; peerA should be credited. + // Head announces canonical A — emit must contain 1 peerA inclusion. chain.sendHeadBlock(blockA) waitStep(t, tr) - - if got := tr.GetAllPeerStats()["peerA"].RecentIncluded; got <= 0 { - t.Fatalf("expected RecentIncluded>0 after canonical-A head event, got %f", got) + if incl := consumer.last().inclusions["peerA"]; incl != 1 { + t.Fatalf("canonical-A head should emit 1 peerA inclusion, got %d", incl) } } -// TestRecentFinalizedDecays verifies that the finalization EMA decays -// for a peer that earned credits in the past but has no new -// finalization activity. The decay is slow (α=0.0001), so we -// just assert monotonic decrease, not convergence to zero. -func TestRecentFinalizedDecays(t *testing.T) { +// TestHandleChainHeadNilConsumer verifies the tracker tolerates a nil +// consumer (useful for tests that only exercise tx-lifecycle behavior). +func TestHandleChainHeadNilConsumer(t *testing.T) { tr := New() chain := newMockChain() - tr.Start(chain) + tr.Start(chain, nil) defer tr.Stop() - tx := makeTx(1) - tr.NotifyAccepted("peerA", []common.Hash{tx.Hash()}) - - // Include and finalize in block 1. - chain.addBlock(1, []*types.Transaction{tx}) + chain.addBlock(1, nil) chain.sendHead(1) - waitStep(t, tr) - chain.setFinalBlock(1) - chain.addBlock(2, nil) - chain.sendHead(2) - waitStep(t, tr) - - peak := tr.GetAllPeerStats()["peerA"].RecentFinalized - if peak <= 0 { - t.Fatalf("expected RecentFinalized>0 after finalization, got %f", peak) - } - - // Send many empty heads — peer contributes zero each block, - // EMA should decay monotonically. - for i := uint64(3); i <= 50; i++ { - chain.addBlock(i, nil) - chain.sendHead(i) - waitStep(t, tr) - } - - after := tr.GetAllPeerStats()["peerA"].RecentFinalized - if after >= peak { - t.Fatalf("expected RecentFinalized to decay, got %f >= peak %f", after, peak) - } -} - -// TestRequestLatencyFirstSampleBootstrap asserts that the first latency -// sample seeds the EMA directly (no slow ramp-up from zero), and that the -// sample counter starts at 1. -func TestRequestLatencyFirstSampleBootstrap(t *testing.T) { - tr := New() - tr.NotifyRequestLatency("peerA", 200*time.Millisecond) - - stats := tr.GetAllPeerStats() - ps := stats["peerA"] - if ps.RequestLatencyEMA != 200*time.Millisecond { - t.Fatalf("expected first sample to seed EMA at 200ms, got %v", ps.RequestLatencyEMA) - } - if ps.RequestSamples != 1 { - t.Fatalf("expected RequestSamples=1, got %d", ps.RequestSamples) - } -} - -// TestRequestLatencyEMAUpdate verifies the EMA formula (1-α)·old + α·new. -func TestRequestLatencyEMAUpdate(t *testing.T) { - tr := New() - tr.NotifyRequestLatency("peerA", 100*time.Millisecond) - tr.NotifyRequestLatency("peerA", 1000*time.Millisecond) - - // Expected: 0.99*100ms + 0.01*1000ms = 109ms - got := tr.GetAllPeerStats()["peerA"].RequestLatencyEMA - want := 109 * time.Millisecond - delta := got - want - if delta < 0 { - delta = -delta - } - if delta > 1*time.Microsecond { - t.Fatalf("EMA mismatch: got %v, want %v (delta %v)", got, want, delta) - } - if samples := tr.GetAllPeerStats()["peerA"].RequestSamples; samples != 2 { - t.Fatalf("expected RequestSamples=2, got %d", samples) - } -} - -// TestRequestLatencySlowEMAConvergence verifies that the slow alpha -// requires many samples to noticeably shift the EMA. Starting at 100ms -// and feeding 5s (timeout) samples, the EMA should still be well below -// 1s after 50 samples. -func TestRequestLatencySlowEMAConvergence(t *testing.T) { - tr := New() - tr.NotifyRequestLatency("peerA", 100*time.Millisecond) - for i := 0; i < 50; i++ { - tr.NotifyRequestLatency("peerA", 5*time.Second) - } - got := tr.GetAllPeerStats()["peerA"].RequestLatencyEMA - if got < 1*time.Second { - // Expected ≈ (0.99)^50 * 100ms + (1-(0.99)^50) * 5s ≈ 1.99s - // The lower bound proves a meaningful shift; the upper bound (below) - // proves the slow alpha damped the convergence. - t.Fatalf("EMA did not move enough under sustained timeouts, got %v", got) - } - if got > 3*time.Second { - t.Fatalf("EMA converged too fast for slow alpha=0.01, got %v", got) - } -} - -// TestRequestLatencyMultiplePeersIsolated verifies per-peer isolation: a -// sample for peerA does not affect peerB's stats. -func TestRequestLatencyMultiplePeersIsolated(t *testing.T) { - tr := New() - tr.NotifyRequestLatency("peerA", 100*time.Millisecond) - tr.NotifyRequestLatency("peerB", 5*time.Second) - - stats := tr.GetAllPeerStats() - if stats["peerA"].RequestLatencyEMA != 100*time.Millisecond { - t.Errorf("peerA EMA: got %v, want 100ms", stats["peerA"].RequestLatencyEMA) - } - if stats["peerB"].RequestLatencyEMA != 5*time.Second { - t.Errorf("peerB EMA: got %v, want 5s", stats["peerB"].RequestLatencyEMA) - } - if stats["peerA"].RequestSamples != 1 || stats["peerB"].RequestSamples != 1 { - t.Errorf("expected RequestSamples=1 for each peer, got A=%d B=%d", - stats["peerA"].RequestSamples, stats["peerB"].RequestSamples) - } -} - -// TestRequestLatencyPeerDropResetsStats verifies that NotifyPeerDrop -// removes the peer's latency history along with its other stats. -func TestRequestLatencyPeerDropResetsStats(t *testing.T) { - tr := New() - tr.NotifyRequestLatency("peerA", 200*time.Millisecond) - tr.NotifyPeerDrop("peerA") - - if _, ok := tr.GetAllPeerStats()["peerA"]; ok { - t.Fatal("peerA stats should be removed after NotifyPeerDrop") - } - - // A subsequent latency sample re-creates the entry as a fresh peer. - tr.NotifyRequestLatency("peerA", 50*time.Millisecond) - ps := tr.GetAllPeerStats()["peerA"] - if ps.RequestSamples != 1 { - t.Fatalf("expected RequestSamples=1 after re-add, got %d", ps.RequestSamples) - } - if ps.RequestLatencyEMA != 50*time.Millisecond { - t.Fatalf("expected fresh EMA bootstrap, got %v", ps.RequestLatencyEMA) - } + waitStep(t, tr) // should not panic } From b178ec9a4a923188e8644a314cb71f36ad611cdf Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Thu, 16 Apr 2026 00:23:13 +0200 Subject: [PATCH 35/38] eth/peerstats: bump MinLatencySamples from 10 to 100 Require substantially more samples before a peer's request-latency EMA becomes eligible for protection. A 10-sample floor was too low: a peer hitting 10 fast replies in a short burst could earn protection before the slow alpha=0.01 EMA had moved meaningfully away from the bootstrap value. At ~70-sample EMA half-life, a 100-sample floor means the EMA has been refined through several half-lives before it can affect dropping decisions. Updates the dropper tests that previously used RequestSamples=50 to use peerstats.MinLatencySamples so they stay robust to future value changes. Design notes and a test comment reference the new value. --- eth/dropper_test.go | 10 +++++----- eth/peerstats/peerstats.go | 2 +- eth/peerstats/peerstats_test.go | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/eth/dropper_test.go b/eth/dropper_test.go index e2ed638322..90333d97d5 100644 --- a/eth/dropper_test.go +++ b/eth/dropper_test.go @@ -243,15 +243,15 @@ func TestProtectedByPoolRequestLatencyBasic(t *testing.T) { // Three peers have enough samples; the two fastest should win. stats[dialed[0].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 50 * time.Millisecond, - RequestSamples: 50, + RequestSamples: peerstats.MinLatencySamples, } stats[dialed[1].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 100 * time.Millisecond, - RequestSamples: 50, + RequestSamples: peerstats.MinLatencySamples, } stats[dialed[2].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 2 * time.Second, - RequestSamples: 50, + RequestSamples: peerstats.MinLatencySamples, } protected := protectedPeersByPool(nil, dialed, stats) @@ -313,7 +313,7 @@ func TestProtectedByPoolRequestLatencyPerPool(t *testing.T) { for _, p := range inbound { stats[p.ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 50 * time.Millisecond, - RequestSamples: 50, + RequestSamples: peerstats.MinLatencySamples, } } // Dialed peers are slower (1s) — globally they would all lose, but @@ -321,7 +321,7 @@ func TestProtectedByPoolRequestLatencyPerPool(t *testing.T) { for _, p := range dialed { stats[p.ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 1 * time.Second, - RequestSamples: 50, + RequestSamples: peerstats.MinLatencySamples, } } diff --git a/eth/peerstats/peerstats.go b/eth/peerstats/peerstats.go index 567a27309a..ab02fe9705 100644 --- a/eth/peerstats/peerstats.go +++ b/eth/peerstats/peerstats.go @@ -52,7 +52,7 @@ const ( // MinLatencySamples is the number of latency samples a peer must accumulate // before its RequestLatencyEMA is considered meaningful for protection. // Prevents a single lucky-fast reply from displacing established peers. - MinLatencySamples = 10 + MinLatencySamples = 100 ) // PeerStats is the exported per-peer snapshot returned by GetAllPeerStats. diff --git a/eth/peerstats/peerstats_test.go b/eth/peerstats/peerstats_test.go index 42d3bfa385..2a8f89a0eb 100644 --- a/eth/peerstats/peerstats_test.go +++ b/eth/peerstats/peerstats_test.go @@ -181,7 +181,7 @@ func TestNotifyPeerDropClearsStats(t *testing.T) { // TestStaleRequestLatencyAfterDrop documents the accepted behavior: a // late sample after NotifyPeerDrop recreates a 1-sample entry. The -// dropper's MinLatencySamples=10 guard ensures this is harmless. +// dropper's MinLatencySamples=100 guard ensures this is harmless. func TestStaleRequestLatencyAfterDrop(t *testing.T) { s := New() s.NotifyRequestLatency("peerA", 200*time.Millisecond) From 89222edba997fb3a927f3739e9b8b39b03b0e2d1 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Thu, 16 Apr 2026 01:07:58 +0200 Subject: [PATCH 36/38] eth/peerstats: gate latency protection on sample freshness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The request-latency category scores peers by the reciprocal of their RequestLatencyEMA, but that EMA is only updated by NotifyRequestLatency — which only fires when the tx fetcher sends a request to the peer. A peer can serve a burst of fast replies to build a strong EMA, stop announcing transactions so we never request from them again, and retain latency protection indefinitely with a frozen score. Record LastLatencySample (wall-clock time) per peer alongside the EMA update. In the dropper's scoring function, return 0 when the last sample is older than MaxLatencyStaleness (10 minutes). Fresh samples reset the clock, so peers that resume activity become eligible again. Timestamps rather than block counts: real-time is what we actually care about (10 minutes idle), not a block count that varies with chain pace, and the EMA itself is a time.Duration so measuring staleness in the same domain stays consistent. Tests cover the timestamp update on NotifyRequestLatency, the timestamp advancing on successive samples, and the dropper rejecting a stale peer whose EMA and sample count would otherwise qualify. --- eth/dropper.go | 6 ++++++ eth/dropper_test.go | 38 +++++++++++++++++++++++++++++++++ eth/peerstats/peerstats.go | 11 ++++++++++ eth/peerstats/peerstats_test.go | 31 +++++++++++++++++++++++++++ 4 files changed, 86 insertions(+) diff --git a/eth/dropper.go b/eth/dropper.go index 0af4b97047..3d370c0733 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -85,6 +85,12 @@ var protectionCategories = []protectionCategory{ if s.RequestSamples < peerstats.MinLatencySamples { return 0 } + // Freshness gate: a peer that earned a fast EMA but then went + // silent on announcements (no requests → no fresh samples) must + // not keep that score indefinitely. Ignore stale data. + if time.Since(s.LastLatencySample) > peerstats.MaxLatencyStaleness { + return 0 + } if s.RequestLatencyEMA <= 0 { return 0 } diff --git a/eth/dropper_test.go b/eth/dropper_test.go index 90333d97d5..63b431de22 100644 --- a/eth/dropper_test.go +++ b/eth/dropper_test.go @@ -244,14 +244,17 @@ func TestProtectedByPoolRequestLatencyBasic(t *testing.T) { stats[dialed[0].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 50 * time.Millisecond, RequestSamples: peerstats.MinLatencySamples, + LastLatencySample: time.Now(), } stats[dialed[1].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 100 * time.Millisecond, RequestSamples: peerstats.MinLatencySamples, + LastLatencySample: time.Now(), } stats[dialed[2].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 2 * time.Second, RequestSamples: peerstats.MinLatencySamples, + LastLatencySample: time.Now(), } protected := protectedPeersByPool(nil, dialed, stats) @@ -285,6 +288,7 @@ func TestProtectedByPoolRequestLatencyBootstrapGuard(t *testing.T) { stats[dialed[1].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 500 * time.Millisecond, RequestSamples: peerstats.MinLatencySamples, + LastLatencySample: time.Now(), } protected := protectedPeersByPool(nil, dialed, stats) @@ -314,6 +318,7 @@ func TestProtectedByPoolRequestLatencyPerPool(t *testing.T) { stats[p.ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 50 * time.Millisecond, RequestSamples: peerstats.MinLatencySamples, + LastLatencySample: time.Now(), } } // Dialed peers are slower (1s) — globally they would all lose, but @@ -322,6 +327,7 @@ func TestProtectedByPoolRequestLatencyPerPool(t *testing.T) { stats[p.ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 1 * time.Second, RequestSamples: peerstats.MinLatencySamples, + LastLatencySample: time.Now(), } } @@ -338,3 +344,35 @@ func TestProtectedByPoolRequestLatencyPerPool(t *testing.T) { t.Fatalf("expected 2 dialed peers protected by per-pool top-N, got %d", dialedProtected) } } + +// TestProtectedByPoolRequestLatencyStale verifies that the freshness gate +// excludes peers whose latency EMA is valid (meeting the sample count and +// fast value) but whose last sample is older than MaxLatencyStaleness. +// A peer cannot serve a burst of fast replies, go silent on announcements, +// and keep latency-based protection indefinitely. +func TestProtectedByPoolRequestLatencyStale(t *testing.T) { + dialed := makePeers(20) + stats := make(map[string]peerstats.PeerStats) + // Fresh, fast peer — should be protected. + stats[dialed[0].ID().String()] = peerstats.PeerStats{ + RequestLatencyEMA: 50 * time.Millisecond, + RequestSamples: peerstats.MinLatencySamples, + LastLatencySample: time.Now(), + } + // Stale, fast peer — was fast, but hasn't answered in too long. + // Same EMA and sample count as the fresh peer; only staleness differs. + stats[dialed[1].ID().String()] = peerstats.PeerStats{ + RequestLatencyEMA: 50 * time.Millisecond, + RequestSamples: peerstats.MinLatencySamples, + LastLatencySample: time.Now().Add(-2 * peerstats.MaxLatencyStaleness), + } + + protected := protectedPeersByPool(nil, dialed, stats) + + if !protected[dialed[0]] { + t.Error("fresh fast peer must be protected") + } + if protected[dialed[1]] { + t.Error("stale peer must NOT keep latency protection despite fast EMA") + } +} diff --git a/eth/peerstats/peerstats.go b/eth/peerstats/peerstats.go index ab02fe9705..1d0d3d36bc 100644 --- a/eth/peerstats/peerstats.go +++ b/eth/peerstats/peerstats.go @@ -53,6 +53,13 @@ const ( // before its RequestLatencyEMA is considered meaningful for protection. // Prevents a single lucky-fast reply from displacing established peers. MinLatencySamples = 100 + // MaxLatencyStaleness is the oldest allowed age of a peer's last + // latency sample before their RequestLatencyEMA is disregarded for + // protection. Prevents a peer from earning a fast score during a + // burst of activity and then holding protection indefinitely by + // going silent on tx announcements (no further requests → no fresh + // samples → EMA frozen at its last value). + MaxLatencyStaleness = 10 * time.Minute ) // PeerStats is the exported per-peer snapshot returned by GetAllPeerStats. @@ -61,6 +68,7 @@ type PeerStats struct { RecentIncluded float64 // EMA of per-block inclusions (fast) RequestLatencyEMA time.Duration // Slow EMA of tx-request response latency (timeouts count as the timeout value) RequestSamples int64 // Number of latency samples seen (for bootstrap guard) + LastLatencySample time.Time // Wall-clock time of the most recent latency sample (for staleness gate) } // peerStats is the internal mutable state per peer. @@ -69,6 +77,7 @@ type peerStats struct { recentIncluded float64 requestLatencyEMA time.Duration requestSamples int64 + lastLatencySample time.Time } // Stats is the per-peer quality aggregator. @@ -141,6 +150,7 @@ func (s *Stats) NotifyRequestLatency(peer string, latency time.Duration) { ) } ps.requestSamples++ + ps.lastLatencySample = time.Now() } // NotifyPeerDrop removes a peer's stats on disconnect. A rare stale @@ -166,6 +176,7 @@ func (s *Stats) GetAllPeerStats() map[string]PeerStats { RecentIncluded: ps.recentIncluded, RequestLatencyEMA: ps.requestLatencyEMA, RequestSamples: ps.requestSamples, + LastLatencySample: ps.lastLatencySample, } } return result diff --git a/eth/peerstats/peerstats_test.go b/eth/peerstats/peerstats_test.go index 2a8f89a0eb..cfa894512f 100644 --- a/eth/peerstats/peerstats_test.go +++ b/eth/peerstats/peerstats_test.go @@ -221,3 +221,34 @@ func TestMultiplePeersIsolated(t *testing.T) { t.Errorf("peerB latency: got %v, want 5s", stats["peerB"].RequestLatencyEMA) } } + +// TestLatencyTimestampSet verifies that NotifyRequestLatency stamps the +// peer's LastLatencySample with approximately time.Now(). +func TestLatencyTimestampSet(t *testing.T) { + s := New() + before := time.Now() + s.NotifyRequestLatency("peerA", 100*time.Millisecond) + after := time.Now() + + got := s.GetAllPeerStats()["peerA"].LastLatencySample + if got.Before(before) || got.After(after) { + t.Fatalf("LastLatencySample = %v not in [%v, %v]", got, before, after) + } +} + +// TestLatencyTimestampUpdatesOnEachSample verifies that a later +// NotifyRequestLatency call advances LastLatencySample. +func TestLatencyTimestampUpdatesOnEachSample(t *testing.T) { + s := New() + s.NotifyRequestLatency("peerA", 100*time.Millisecond) + first := s.GetAllPeerStats()["peerA"].LastLatencySample + + // Small sleep so the second timestamp is detectably later. + time.Sleep(2 * time.Millisecond) + s.NotifyRequestLatency("peerA", 200*time.Millisecond) + second := s.GetAllPeerStats()["peerA"].LastLatencySample + + if !second.After(first) { + t.Fatalf("expected second sample timestamp > first, got first=%v second=%v", first, second) + } +} From b6b6345be900c7f5426981a8adcd93ec718bdbeb Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Thu, 16 Apr 2026 15:16:29 +0200 Subject: [PATCH 37/38] eth: rename NotifyRequestLatency to NotifyRequestResult, track success/timeout counts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace NotifyRequestLatency(peer, latency) with NotifyRequestResult(peer, latency, timeout). The new timeout bool tells peerstats whether the request was answered or timed out. Per-peer RequestSuccesses and RequestTimeouts counters replace the single RequestSamples field — any two of the three are derivable, so we keep the two primary counters and derive the total (successes + timeouts) where needed (e.g. the MinLatencySamples guard in the dropper). The latency EMA continues to use the timeout value for timed-out requests, penalizing slow peers as before. The success/timeout counters are exposed as statistics only — no protection category uses them yet. --- eth/dropper.go | 2 +- eth/dropper_test.go | 18 +++---- eth/fetcher/tx_fetcher.go | 18 +++---- eth/fetcher/tx_fetcher_test.go | 59 ++++++++++----------- eth/handler.go | 2 +- eth/peerstats/peerstats.go | 38 +++++++++----- eth/peerstats/peerstats_test.go | 93 +++++++++++++++++++++++---------- 7 files changed, 139 insertions(+), 91 deletions(-) diff --git a/eth/dropper.go b/eth/dropper.go index 3d370c0733..0982dba91d 100644 --- a/eth/dropper.go +++ b/eth/dropper.go @@ -82,7 +82,7 @@ var protectionCategories = []protectionCategory{ // whose EMA reaches the timeout also score 0 by this path because // the reciprocal of a very large duration is tiny but positive; the // per-pool top-N will still push faster peers ahead of them. - if s.RequestSamples < peerstats.MinLatencySamples { + if s.RequestSuccesses+s.RequestTimeouts < peerstats.MinLatencySamples { return 0 } // Freshness gate: a peer that earned a fast EMA but then went diff --git a/eth/dropper_test.go b/eth/dropper_test.go index 63b431de22..c56f293166 100644 --- a/eth/dropper_test.go +++ b/eth/dropper_test.go @@ -243,17 +243,17 @@ func TestProtectedByPoolRequestLatencyBasic(t *testing.T) { // Three peers have enough samples; the two fastest should win. stats[dialed[0].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 50 * time.Millisecond, - RequestSamples: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now(), } stats[dialed[1].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 100 * time.Millisecond, - RequestSamples: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now(), } stats[dialed[2].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 2 * time.Second, - RequestSamples: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now(), } @@ -282,12 +282,12 @@ func TestProtectedByPoolRequestLatencyBootstrapGuard(t *testing.T) { // A lucky-fast peer with only 1 sample — must NOT be protected. stats[dialed[0].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 1 * time.Millisecond, - RequestSamples: 1, + RequestSuccesses: 1, } // A warmed-up but slower peer — should be protected on latency. stats[dialed[1].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 500 * time.Millisecond, - RequestSamples: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now(), } @@ -317,7 +317,7 @@ func TestProtectedByPoolRequestLatencyPerPool(t *testing.T) { for _, p := range inbound { stats[p.ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 50 * time.Millisecond, - RequestSamples: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now(), } } @@ -326,7 +326,7 @@ func TestProtectedByPoolRequestLatencyPerPool(t *testing.T) { for _, p := range dialed { stats[p.ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 1 * time.Second, - RequestSamples: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now(), } } @@ -356,14 +356,14 @@ func TestProtectedByPoolRequestLatencyStale(t *testing.T) { // Fresh, fast peer — should be protected. stats[dialed[0].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 50 * time.Millisecond, - RequestSamples: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now(), } // Stale, fast peer — was fast, but hasn't answered in too long. // Same EMA and sample count as the fresh peer; only staleness differs. stats[dialed[1].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 50 * time.Millisecond, - RequestSamples: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now().Add(-2 * peerstats.MaxLatencyStaleness), } diff --git a/eth/fetcher/tx_fetcher.go b/eth/fetcher/tx_fetcher.go index ffed7d5acf..6aba3b1f23 100644 --- a/eth/fetcher/tx_fetcher.go +++ b/eth/fetcher/tx_fetcher.go @@ -185,7 +185,7 @@ type TxFetcher struct { fetchTxs func(string, []common.Hash) error // Retrieves a set of txs from a remote peer dropPeer func(string) // Drops a peer in case of announcement violation onAccepted func(peer string, hashes []common.Hash) // Optional: notified with accepted tx hashes per peer - onRequestLatency func(peer string, latency time.Duration) // Optional: notified once per completed/timed-out tx request + onRequestResult func(peer string, latency time.Duration, timeout bool) // Optional: notified once per completed/timed-out tx request step chan struct{} // Notification channel when the fetcher loop iterates clock mclock.Clock // Monotonic clock or simulated clock for tests @@ -196,15 +196,15 @@ type TxFetcher struct { // NewTxFetcher creates a transaction fetcher to retrieve transaction // based on hash announcements. // Chain can be nil to disable on-chain checks. -func NewTxFetcher(chain *core.BlockChain, validateMeta func(common.Hash, byte) error, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string), onAccepted func(string, []common.Hash), onRequestLatency func(string, time.Duration)) *TxFetcher { - return NewTxFetcherForTests(chain, validateMeta, addTxs, fetchTxs, dropPeer, onAccepted, onRequestLatency, mclock.System{}, time.Now, nil) +func NewTxFetcher(chain *core.BlockChain, validateMeta func(common.Hash, byte) error, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string), onAccepted func(string, []common.Hash), onRequestResult func(string, time.Duration, bool)) *TxFetcher { + return NewTxFetcherForTests(chain, validateMeta, addTxs, fetchTxs, dropPeer, onAccepted, onRequestResult, mclock.System{}, time.Now, nil) } // NewTxFetcherForTests is a testing method to mock out the realtime clock with // a simulated version and the internal randomness with a deterministic one. // Chain can be nil to disable on-chain checks. func NewTxFetcherForTests( - chain *core.BlockChain, validateMeta func(common.Hash, byte) error, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string), onAccepted func(string, []common.Hash), onRequestLatency func(string, time.Duration), + chain *core.BlockChain, validateMeta func(common.Hash, byte) error, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string), onAccepted func(string, []common.Hash), onRequestResult func(string, time.Duration, bool), clock mclock.Clock, realTime func() time.Time, rand *mrand.Rand) *TxFetcher { return &TxFetcher{ notify: make(chan *txAnnounce), @@ -227,7 +227,7 @@ func NewTxFetcherForTests( fetchTxs: fetchTxs, dropPeer: dropPeer, onAccepted: onAccepted, - onRequestLatency: onRequestLatency, + onRequestResult: onRequestResult, clock: clock, realTime: realTime, rand: rand, @@ -680,8 +680,8 @@ func (f *TxFetcher) loop() { // itself, so a peer that times out repeatedly drags its // score down without us having to wait for an eventual // (possibly never-arriving) reply. - if f.onRequestLatency != nil { - f.onRequestLatency(peer, txFetchTimeout) + if f.onRequestResult != nil { + f.onRequestResult(peer, txFetchTimeout, true) } } } @@ -781,9 +781,9 @@ func (f *TxFetcher) loop() { txFetcherSlowWait.Update(time.Duration(f.clock.Now() - req.time).Nanoseconds()) // Already counted as a timeout sample at the timeout site; // don't double-record on eventual delivery. - } else if f.onRequestLatency != nil { + } else if f.onRequestResult != nil { // Normal in-time delivery. Record the actual round-trip. - f.onRequestLatency(delivery.origin, time.Duration(f.clock.Now()-req.time)) + f.onRequestResult(delivery.origin, time.Duration(f.clock.Now()-req.time), false) } delete(f.requests, delivery.origin) diff --git a/eth/fetcher/tx_fetcher_test.go b/eth/fetcher/tx_fetcher_test.go index 92f3979acb..2b4f156420 100644 --- a/eth/fetcher/tx_fetcher_test.go +++ b/eth/fetcher/tx_fetcher_test.go @@ -2287,53 +2287,51 @@ func TestTransactionForgotten(t *testing.T) { } } -// latencyRecorder is a thread-safe recorder for onRequestLatency callbacks. -type latencyRecorder struct { +// resultRecorder is a thread-safe recorder for onRequestResult callbacks. +type resultRecorder struct { mu sync.Mutex - samples []latencySample + samples []resultSample } -type latencySample struct { +type resultSample struct { peer string latency time.Duration + timeout bool } -func (r *latencyRecorder) record(peer string, latency time.Duration) { +func (r *resultRecorder) record(peer string, latency time.Duration, timeout bool) { r.mu.Lock() defer r.mu.Unlock() - r.samples = append(r.samples, latencySample{peer, latency}) + r.samples = append(r.samples, resultSample{peer, latency, timeout}) } -func (r *latencyRecorder) snapshot() []latencySample { +func (r *resultRecorder) snapshot() []resultSample { r.mu.Lock() defer r.mu.Unlock() - out := make([]latencySample, len(r.samples)) + out := make([]resultSample, len(r.samples)) copy(out, r.samples) return out } -// TestTransactionFetcherRequestLatencyOnDelivery asserts that an in-time -// direct delivery of a requested batch fires the onRequestLatency callback -// exactly once with the actual round-trip latency. -func TestTransactionFetcherRequestLatencyOnDelivery(t *testing.T) { - rec := &latencyRecorder{} +// TestTransactionFetcherRequestResultOnDelivery asserts that an in-time +// direct delivery fires the onRequestResult callback with timeout=false. +func TestTransactionFetcherRequestResultOnDelivery(t *testing.T) { + rec := &resultRecorder{} testTransactionFetcherParallel(t, txFetcherTest{ init: func() *TxFetcher { f := newTestTxFetcher() - f.onRequestLatency = rec.record + f.onRequestResult = rec.record return f }, steps: []interface{}{ doTxNotify{peer: "A", hashes: []common.Hash{testTxsHashes[0]}, types: []byte{testTxs[0].Type()}, sizes: []uint32{uint32(testTxs[0].Size())}}, - // Wait for the announce-arrival timer; request is dispatched at this point. doWait{time: txArriveTimeout, step: true}, - // Simulate 200ms round-trip before the response arrives. doWait{time: 200 * time.Millisecond, step: false}, doTxEnqueue{peer: "A", txs: []*types.Transaction{testTxs[0]}, direct: true}, doFunc(func() { samples := rec.snapshot() if len(samples) != 1 { - t.Fatalf("expected 1 latency sample, got %d (%v)", len(samples), samples) + t.Fatalf("expected 1 sample, got %d (%v)", len(samples), samples) } if samples[0].peer != "A" { t.Errorf("peer mismatch: got %q, want A", samples[0].peer) @@ -2341,28 +2339,28 @@ func TestTransactionFetcherRequestLatencyOnDelivery(t *testing.T) { if samples[0].latency != 200*time.Millisecond { t.Errorf("latency mismatch: got %v, want 200ms", samples[0].latency) } + if samples[0].timeout { + t.Error("expected timeout=false for delivery") + } }), }, }) } -// TestTransactionFetcherRequestLatencyOnTimeout asserts that when a request -// times out (no reply within txFetchTimeout), onRequestLatency fires once -// with the timeout value, and a subsequent (late) delivery does not fire -// a duplicate sample. -func TestTransactionFetcherRequestLatencyOnTimeout(t *testing.T) { - rec := &latencyRecorder{} +// TestTransactionFetcherRequestResultOnTimeout asserts that a timed-out +// request fires onRequestResult with timeout=true and the timeout value, +// and a subsequent (late) delivery does not fire a duplicate sample. +func TestTransactionFetcherRequestResultOnTimeout(t *testing.T) { + rec := &resultRecorder{} testTransactionFetcherParallel(t, txFetcherTest{ init: func() *TxFetcher { f := newTestTxFetcher() - f.onRequestLatency = rec.record + f.onRequestResult = rec.record return f }, steps: []interface{}{ doTxNotify{peer: "A", hashes: []common.Hash{testTxsHashes[0]}, types: []byte{testTxs[0].Type()}, sizes: []uint32{uint32(testTxs[0].Size())}}, doWait{time: txArriveTimeout, step: true}, - // Push the clock past the request deadline; the timeout handler - // should fire and record a single timeout-valued sample. doWait{time: txFetchTimeout, step: true}, doFunc(func() { samples := rec.snapshot() @@ -2375,13 +2373,14 @@ func TestTransactionFetcherRequestLatencyOnTimeout(t *testing.T) { if samples[0].latency != txFetchTimeout { t.Errorf("latency mismatch: got %v, want %v", samples[0].latency, txFetchTimeout) } + if !samples[0].timeout { + t.Error("expected timeout=true for timed-out request") + } }), - // A late reply from the slow peer must not produce a second sample. doTxEnqueue{peer: "A", txs: []*types.Transaction{testTxs[0]}, direct: true}, doFunc(func() { - samples := rec.snapshot() - if len(samples) != 1 { - t.Fatalf("late delivery double-counted latency: got %d samples, want 1", len(samples)) + if len(rec.snapshot()) != 1 { + t.Fatalf("late delivery double-counted: got %d samples, want 1", len(rec.snapshot())) } }), }, diff --git a/eth/handler.go b/eth/handler.go index 8916dad662..448cfe0a2a 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -194,7 +194,7 @@ func newHandler(config *handlerConfig) (*handler, error) { } h.txTracker = txtracker.New() h.peerStats = peerstats.New() - h.txFetcher = fetcher.NewTxFetcher(h.chain, validateMeta, addTxs, fetchTx, h.removePeer, h.txTracker.NotifyAccepted, h.peerStats.NotifyRequestLatency) + h.txFetcher = fetcher.NewTxFetcher(h.chain, validateMeta, addTxs, fetchTx, h.removePeer, h.txTracker.NotifyAccepted, h.peerStats.NotifyRequestResult) return h, nil } diff --git a/eth/peerstats/peerstats.go b/eth/peerstats/peerstats.go index 1d0d3d36bc..146226a187 100644 --- a/eth/peerstats/peerstats.go +++ b/eth/peerstats/peerstats.go @@ -26,9 +26,10 @@ // Signal sources: // - NotifyBlock(inclusions, finalized) — per-block deltas from txtracker // (computed under txtracker's own lock, then passed in after release) -// - NotifyRequestLatency(peer, latency) — per-request samples from the -// fetcher; timeouts are reported with the timeout value so slow peers -// contribute to the EMA +// - NotifyRequestResult(peer, latency, timeout) — per-request outcomes +// from the fetcher; timeouts are reported with the timeout value so +// slow peers contribute to the EMA, and the timeout flag increments +// the per-peer timeout counter // - NotifyPeerDrop(peer) — called from the handler on disconnect package peerstats @@ -67,8 +68,9 @@ type PeerStats struct { RecentFinalized float64 // EMA of per-block finalization credits (slow) RecentIncluded float64 // EMA of per-block inclusions (fast) RequestLatencyEMA time.Duration // Slow EMA of tx-request response latency (timeouts count as the timeout value) - RequestSamples int64 // Number of latency samples seen (for bootstrap guard) - LastLatencySample time.Time // Wall-clock time of the most recent latency sample (for staleness gate) + RequestSuccesses int64 // Requests answered before timeout + RequestTimeouts int64 // Requests that timed out + LastLatencySample time.Time // Wall-clock time of the most recent request result (for staleness gate) } // peerStats is the internal mutable state per peer. @@ -76,7 +78,8 @@ type peerStats struct { recentFinalized float64 recentIncluded float64 requestLatencyEMA time.Duration - requestSamples int64 + requestSuccesses int64 + requestTimeouts int64 lastLatencySample time.Time } @@ -126,11 +129,13 @@ func (s *Stats) NotifyBlock(inclusions, finalized map[string]int) { } } -// NotifyRequestLatency records a tx-request response latency sample for -// the given peer. Timeouts should be reported as the timeout value. -// Creates a peer entry if one doesn't exist (a peer may have latency -// samples before any inclusion signal). -func (s *Stats) NotifyRequestLatency(peer string, latency time.Duration) { +// NotifyRequestResult records a tx-request outcome for the given peer. +// latency is the round-trip time (for timeouts, pass the timeout value). +// timeout indicates whether the request timed out rather than receiving a +// normal delivery. Both cases update the latency EMA; the timeout flag +// additionally increments the per-peer timeout counter. +// Creates a peer entry if one doesn't exist. +func (s *Stats) NotifyRequestResult(peer string, latency time.Duration, timeout bool) { s.mu.Lock() defer s.mu.Unlock() @@ -139,7 +144,7 @@ func (s *Stats) NotifyRequestLatency(peer string, latency time.Duration) { ps = &peerStats{} s.peers[peer] = ps } - if ps.requestSamples == 0 { + if ps.requestSuccesses+ps.requestTimeouts == 0 { // Bootstrap the EMA with the first sample so it doesn't drift up // from zero over many samples before reaching realistic values. ps.requestLatencyEMA = latency @@ -149,7 +154,11 @@ func (s *Stats) NotifyRequestLatency(peer string, latency time.Duration) { float64(latency)*latencyEMAAlpha, ) } - ps.requestSamples++ + if timeout { + ps.requestTimeouts++ + } else { + ps.requestSuccesses++ + } ps.lastLatencySample = time.Now() } @@ -175,7 +184,8 @@ func (s *Stats) GetAllPeerStats() map[string]PeerStats { RecentFinalized: ps.recentFinalized, RecentIncluded: ps.recentIncluded, RequestLatencyEMA: ps.requestLatencyEMA, - RequestSamples: ps.requestSamples, + RequestSuccesses: ps.requestSuccesses, + RequestTimeouts: ps.requestTimeouts, LastLatencySample: ps.lastLatencySample, } } diff --git a/eth/peerstats/peerstats_test.go b/eth/peerstats/peerstats_test.go index cfa894512f..3b242eac15 100644 --- a/eth/peerstats/peerstats_test.go +++ b/eth/peerstats/peerstats_test.go @@ -114,26 +114,29 @@ func TestNotifyBlockInclusionEMAUpdate(t *testing.T) { } } -// TestNotifyRequestLatencyFirstSampleBootstrap asserts that the first +// TestNotifyRequestResultFirstSampleBootstrap asserts that the first // latency sample seeds the EMA directly. -func TestNotifyRequestLatencyFirstSampleBootstrap(t *testing.T) { +func TestNotifyRequestResultFirstSampleBootstrap(t *testing.T) { s := New() - s.NotifyRequestLatency("peerA", 200*time.Millisecond) + s.NotifyRequestResult("peerA", 200*time.Millisecond, false) ps := s.GetAllPeerStats()["peerA"] if ps.RequestLatencyEMA != 200*time.Millisecond { t.Fatalf("expected first sample to seed EMA at 200ms, got %v", ps.RequestLatencyEMA) } - if ps.RequestSamples != 1 { - t.Fatalf("expected RequestSamples=1, got %d", ps.RequestSamples) + if ps.RequestSuccesses != 1 { + t.Fatalf("expected RequestSuccesses=1, got %d", ps.RequestSuccesses) + } + if ps.RequestTimeouts != 0 { + t.Fatalf("expected RequestTimeouts=0, got %d", ps.RequestTimeouts) } } -// TestNotifyRequestLatencyEMAUpdate verifies the EMA formula for latency. -func TestNotifyRequestLatencyEMAUpdate(t *testing.T) { +// TestNotifyRequestResultEMAUpdate verifies the EMA formula for latency. +func TestNotifyRequestResultEMAUpdate(t *testing.T) { s := New() - s.NotifyRequestLatency("peerA", 100*time.Millisecond) - s.NotifyRequestLatency("peerA", 1000*time.Millisecond) + s.NotifyRequestResult("peerA", 100*time.Millisecond, false) + s.NotifyRequestResult("peerA", 1000*time.Millisecond, false) // Expected: 0.99*100ms + 0.01*1000ms = 109ms got := s.GetAllPeerStats()["peerA"].RequestLatencyEMA @@ -145,18 +148,19 @@ func TestNotifyRequestLatencyEMAUpdate(t *testing.T) { if delta > 1*time.Microsecond { t.Fatalf("EMA mismatch: got %v, want %v", got, want) } - if samples := s.GetAllPeerStats()["peerA"].RequestSamples; samples != 2 { - t.Fatalf("expected RequestSamples=2, got %d", samples) + ps := s.GetAllPeerStats()["peerA"] + if ps.RequestSuccesses != 2 { + t.Fatalf("expected RequestSuccesses=2, got %d", ps.RequestSuccesses) } } -// TestNotifyRequestLatencySlowConvergence verifies the slow alpha +// TestNotifyRequestResultSlowConvergence verifies the slow alpha // damps convergence under sustained timeouts. -func TestNotifyRequestLatencySlowConvergence(t *testing.T) { +func TestNotifyRequestResultSlowConvergence(t *testing.T) { s := New() - s.NotifyRequestLatency("peerA", 100*time.Millisecond) + s.NotifyRequestResult("peerA", 100*time.Millisecond, false) for i := 0; i < 50; i++ { - s.NotifyRequestLatency("peerA", 5*time.Second) + s.NotifyRequestResult("peerA", 5*time.Second, false) } got := s.GetAllPeerStats()["peerA"].RequestLatencyEMA if got < 1*time.Second { @@ -171,7 +175,7 @@ func TestNotifyRequestLatencySlowConvergence(t *testing.T) { // from GetAllPeerStats. func TestNotifyPeerDropClearsStats(t *testing.T) { s := New() - s.NotifyRequestLatency("peerA", 200*time.Millisecond) + s.NotifyRequestResult("peerA", 200*time.Millisecond, false) s.NotifyPeerDrop("peerA") if _, ok := s.GetAllPeerStats()["peerA"]; ok { @@ -184,14 +188,14 @@ func TestNotifyPeerDropClearsStats(t *testing.T) { // dropper's MinLatencySamples=100 guard ensures this is harmless. func TestStaleRequestLatencyAfterDrop(t *testing.T) { s := New() - s.NotifyRequestLatency("peerA", 200*time.Millisecond) + s.NotifyRequestResult("peerA", 200*time.Millisecond, false) s.NotifyPeerDrop("peerA") // Late sample racing with the drop. - s.NotifyRequestLatency("peerA", 50*time.Millisecond) + s.NotifyRequestResult("peerA", 50*time.Millisecond, false) ps := s.GetAllPeerStats()["peerA"] - if ps.RequestSamples != 1 { - t.Fatalf("expected fresh RequestSamples=1, got %d", ps.RequestSamples) + if ps.RequestSuccesses != 1 { + t.Fatalf("expected fresh RequestSuccesses=1, got %d", ps.RequestSuccesses) } if ps.RequestLatencyEMA != 50*time.Millisecond { t.Fatalf("expected fresh bootstrap at 50ms, got %v", ps.RequestLatencyEMA) @@ -204,8 +208,8 @@ func TestStaleRequestLatencyAfterDrop(t *testing.T) { func TestMultiplePeersIsolated(t *testing.T) { s := New() s.NotifyBlock(map[string]int{"peerA": 5, "peerB": 0}, nil) - s.NotifyRequestLatency("peerA", 100*time.Millisecond) - s.NotifyRequestLatency("peerB", 5*time.Second) + s.NotifyRequestResult("peerA", 100*time.Millisecond, false) + s.NotifyRequestResult("peerB", 5*time.Second, false) s.NotifyBlock(nil, map[string]int{"peerA": 2}) stats := s.GetAllPeerStats() @@ -222,12 +226,12 @@ func TestMultiplePeersIsolated(t *testing.T) { } } -// TestLatencyTimestampSet verifies that NotifyRequestLatency stamps the +// TestLatencyTimestampSet verifies that NotifyRequestResult stamps the // peer's LastLatencySample with approximately time.Now(). func TestLatencyTimestampSet(t *testing.T) { s := New() before := time.Now() - s.NotifyRequestLatency("peerA", 100*time.Millisecond) + s.NotifyRequestResult("peerA", 100*time.Millisecond, false) after := time.Now() got := s.GetAllPeerStats()["peerA"].LastLatencySample @@ -237,18 +241,53 @@ func TestLatencyTimestampSet(t *testing.T) { } // TestLatencyTimestampUpdatesOnEachSample verifies that a later -// NotifyRequestLatency call advances LastLatencySample. +// NotifyRequestResult call advances LastLatencySample. func TestLatencyTimestampUpdatesOnEachSample(t *testing.T) { s := New() - s.NotifyRequestLatency("peerA", 100*time.Millisecond) + s.NotifyRequestResult("peerA", 100*time.Millisecond, false) first := s.GetAllPeerStats()["peerA"].LastLatencySample // Small sleep so the second timestamp is detectably later. time.Sleep(2 * time.Millisecond) - s.NotifyRequestLatency("peerA", 200*time.Millisecond) + s.NotifyRequestResult("peerA", 200*time.Millisecond, false) second := s.GetAllPeerStats()["peerA"].LastLatencySample if !second.After(first) { t.Fatalf("expected second sample timestamp > first, got first=%v second=%v", first, second) } } + +// TestRequestResultTimeoutCounting verifies that timeout=true increments +// RequestTimeouts (not RequestSuccesses) and still updates the EMA. +func TestRequestResultTimeoutCounting(t *testing.T) { + s := New() + s.NotifyRequestResult("peerA", 5*time.Second, true) + + ps := s.GetAllPeerStats()["peerA"] + if ps.RequestTimeouts != 1 { + t.Fatalf("expected RequestTimeouts=1, got %d", ps.RequestTimeouts) + } + if ps.RequestSuccesses != 0 { + t.Fatalf("expected RequestSuccesses=0, got %d", ps.RequestSuccesses) + } + if ps.RequestLatencyEMA != 5*time.Second { + t.Fatalf("EMA should bootstrap to timeout value, got %v", ps.RequestLatencyEMA) + } +} + +// TestRequestResultMixedCounting verifies that a mix of successes and +// timeouts increments the correct counters independently. +func TestRequestResultMixedCounting(t *testing.T) { + s := New() + s.NotifyRequestResult("peerA", 100*time.Millisecond, false) + s.NotifyRequestResult("peerA", 100*time.Millisecond, false) + s.NotifyRequestResult("peerA", 5*time.Second, true) + + ps := s.GetAllPeerStats()["peerA"] + if ps.RequestSuccesses != 2 { + t.Fatalf("expected RequestSuccesses=2, got %d", ps.RequestSuccesses) + } + if ps.RequestTimeouts != 1 { + t.Fatalf("expected RequestTimeouts=1, got %d", ps.RequestTimeouts) + } +} From e8083ed0f7ac0a6ef32646410645677f6661000f Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Mon, 20 Apr 2026 10:01:49 +0200 Subject: [PATCH 38/38] eth: fix lint issues after rebase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three files had goimports drift from resolving rebase conflicts (eth/dropper_test.go, eth/fetcher/tx_fetcher.go, eth/handler.go) — re-run goimports. Also remove an unused mockConsumer.count() helper in eth/txtracker/tracker_test.go that no test calls. The method was left in during the peerstats split and never needed. --- eth/dropper_test.go | 18 +++++------ eth/fetcher/tx_fetcher.go | 56 +++++++++++++++++------------------ eth/handler.go | 2 +- eth/txtracker/tracker_test.go | 6 ---- 4 files changed, 38 insertions(+), 44 deletions(-) diff --git a/eth/dropper_test.go b/eth/dropper_test.go index c56f293166..67f24a064d 100644 --- a/eth/dropper_test.go +++ b/eth/dropper_test.go @@ -243,17 +243,17 @@ func TestProtectedByPoolRequestLatencyBasic(t *testing.T) { // Three peers have enough samples; the two fastest should win. stats[dialed[0].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 50 * time.Millisecond, - RequestSuccesses: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now(), } stats[dialed[1].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 100 * time.Millisecond, - RequestSuccesses: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now(), } stats[dialed[2].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 2 * time.Second, - RequestSuccesses: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now(), } @@ -282,12 +282,12 @@ func TestProtectedByPoolRequestLatencyBootstrapGuard(t *testing.T) { // A lucky-fast peer with only 1 sample — must NOT be protected. stats[dialed[0].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 1 * time.Millisecond, - RequestSuccesses: 1, + RequestSuccesses: 1, } // A warmed-up but slower peer — should be protected on latency. stats[dialed[1].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 500 * time.Millisecond, - RequestSuccesses: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now(), } @@ -317,7 +317,7 @@ func TestProtectedByPoolRequestLatencyPerPool(t *testing.T) { for _, p := range inbound { stats[p.ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 50 * time.Millisecond, - RequestSuccesses: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now(), } } @@ -326,7 +326,7 @@ func TestProtectedByPoolRequestLatencyPerPool(t *testing.T) { for _, p := range dialed { stats[p.ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 1 * time.Second, - RequestSuccesses: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now(), } } @@ -356,14 +356,14 @@ func TestProtectedByPoolRequestLatencyStale(t *testing.T) { // Fresh, fast peer — should be protected. stats[dialed[0].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 50 * time.Millisecond, - RequestSuccesses: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now(), } // Stale, fast peer — was fast, but hasn't answered in too long. // Same EMA and sample count as the fresh peer; only staleness differs. stats[dialed[1].ID().String()] = peerstats.PeerStats{ RequestLatencyEMA: 50 * time.Millisecond, - RequestSuccesses: peerstats.MinLatencySamples, + RequestSuccesses: peerstats.MinLatencySamples, LastLatencySample: time.Now().Add(-2 * peerstats.MaxLatencyStaleness), } diff --git a/eth/fetcher/tx_fetcher.go b/eth/fetcher/tx_fetcher.go index 6aba3b1f23..b5d0e954e1 100644 --- a/eth/fetcher/tx_fetcher.go +++ b/eth/fetcher/tx_fetcher.go @@ -180,11 +180,11 @@ type TxFetcher struct { alternates map[common.Hash]map[string]struct{} // In-flight transaction alternate origins if retrieval fails // Callbacks - validateMeta func(common.Hash, byte) error // Validate a tx metadata based on the local txpool - addTxs func([]*types.Transaction) []error // Insert a batch of transactions into local txpool - fetchTxs func(string, []common.Hash) error // Retrieves a set of txs from a remote peer - dropPeer func(string) // Drops a peer in case of announcement violation - onAccepted func(peer string, hashes []common.Hash) // Optional: notified with accepted tx hashes per peer + validateMeta func(common.Hash, byte) error // Validate a tx metadata based on the local txpool + addTxs func([]*types.Transaction) []error // Insert a batch of transactions into local txpool + fetchTxs func(string, []common.Hash) error // Retrieves a set of txs from a remote peer + dropPeer func(string) // Drops a peer in case of announcement violation + onAccepted func(peer string, hashes []common.Hash) // Optional: notified with accepted tx hashes per peer onRequestResult func(peer string, latency time.Duration, timeout bool) // Optional: notified once per completed/timed-out tx request step chan struct{} // Notification channel when the fetcher loop iterates @@ -207,30 +207,30 @@ func NewTxFetcherForTests( chain *core.BlockChain, validateMeta func(common.Hash, byte) error, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string), onAccepted func(string, []common.Hash), onRequestResult func(string, time.Duration, bool), clock mclock.Clock, realTime func() time.Time, rand *mrand.Rand) *TxFetcher { return &TxFetcher{ - notify: make(chan *txAnnounce), - cleanup: make(chan *txDelivery), - drop: make(chan *txDrop), - quit: make(chan struct{}), - waitlist: make(map[common.Hash]map[string]struct{}), - waittime: make(map[common.Hash]mclock.AbsTime), - waitslots: make(map[string]map[common.Hash]*txMetadataWithSeq), - announces: make(map[string]map[common.Hash]*txMetadataWithSeq), - announced: make(map[common.Hash]map[string]struct{}), - fetching: make(map[common.Hash]string), - requests: make(map[string]*txRequest), - alternates: make(map[common.Hash]map[string]struct{}), - underpriced: lru.NewCache[common.Hash, time.Time](maxTxUnderpricedSetSize), - txOnChainCache: lru.NewCache[common.Hash, struct{}](txOnChainCacheLimit), - chain: chain, - validateMeta: validateMeta, - addTxs: addTxs, - fetchTxs: fetchTxs, - dropPeer: dropPeer, - onAccepted: onAccepted, + notify: make(chan *txAnnounce), + cleanup: make(chan *txDelivery), + drop: make(chan *txDrop), + quit: make(chan struct{}), + waitlist: make(map[common.Hash]map[string]struct{}), + waittime: make(map[common.Hash]mclock.AbsTime), + waitslots: make(map[string]map[common.Hash]*txMetadataWithSeq), + announces: make(map[string]map[common.Hash]*txMetadataWithSeq), + announced: make(map[common.Hash]map[string]struct{}), + fetching: make(map[common.Hash]string), + requests: make(map[string]*txRequest), + alternates: make(map[common.Hash]map[string]struct{}), + underpriced: lru.NewCache[common.Hash, time.Time](maxTxUnderpricedSetSize), + txOnChainCache: lru.NewCache[common.Hash, struct{}](txOnChainCacheLimit), + chain: chain, + validateMeta: validateMeta, + addTxs: addTxs, + fetchTxs: fetchTxs, + dropPeer: dropPeer, + onAccepted: onAccepted, onRequestResult: onRequestResult, - clock: clock, - realTime: realTime, - rand: rand, + clock: clock, + realTime: realTime, + rand: rand, } } diff --git a/eth/handler.go b/eth/handler.go index 448cfe0a2a..11b8e79f0f 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -36,9 +36,9 @@ import ( "github.com/ethereum/go-ethereum/eth/downloader" "github.com/ethereum/go-ethereum/eth/ethconfig" "github.com/ethereum/go-ethereum/eth/fetcher" + "github.com/ethereum/go-ethereum/eth/peerstats" "github.com/ethereum/go-ethereum/eth/protocols/eth" "github.com/ethereum/go-ethereum/eth/protocols/snap" - "github.com/ethereum/go-ethereum/eth/peerstats" "github.com/ethereum/go-ethereum/eth/txtracker" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/event" diff --git a/eth/txtracker/tracker_test.go b/eth/txtracker/tracker_test.go index ad0637eb0a..16adb5b92c 100644 --- a/eth/txtracker/tracker_test.go +++ b/eth/txtracker/tracker_test.go @@ -175,12 +175,6 @@ func (c *mockConsumer) last() signal { return c.signals[len(c.signals)-1] } -func (c *mockConsumer) count() int { - c.mu.Lock() - defer c.mu.Unlock() - return len(c.signals) -} - // waitStep blocks until the tracker has processed one event. func waitStep(t *testing.T, tr *Tracker) { t.Helper()