From b026ef6bb7fa9b59fda3640d8e09784a6193fa24 Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Thu, 7 May 2026 08:56:23 +0200 Subject: [PATCH] p2p/discover: drop discv4 prefix-bit grind from CrawlIterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original CrawlIterator on the discv4 path generated FINDNODE targets by grinding random pubkeys until their Keccak256 had a specific top-N-bit prefix matching a per-call rotation index, then sending them. The aim was to anchor each peer's response to a different /16 region of the global keyspace. Empirically (3 x 5-minute runs against mainnet bootnodes): mode total mean ± std mainnet mean ± std fast (grind) 5714 ± 117 549 ± 33 fast-random 5306 ± 366 521 ± 124 Means are within 1σ of each other. The grind's only measurable benefit is reduced run-to-run variance, not higher yield. For long-running curated crawls (the production use case for cmd/devp2p) the variance amortises away, so the simplification is worth taking. Replace the grind with a plain crand.Read on the v4 target, drop the randomTargetWithPrefix helper, log2Pow2 helper, and the v4-side prefix-bit math from withDefaults. Drange becomes a v5-only knob and its doc is updated to say so; the power-of-two requirement is gone. discv5 is unchanged: it uses native distance rotation, not target hashes, and was never affected by the grind. --- p2p/discover/crawliter.go | 72 ++++++++------------------------------- 1 file changed, 14 insertions(+), 58 deletions(-) diff --git a/p2p/discover/crawliter.go b/p2p/discover/crawliter.go index a099eed30b..8a7fc5c92b 100644 --- a/p2p/discover/crawliter.go +++ b/p2p/discover/crawliter.go @@ -21,7 +21,6 @@ import ( "sync" "sync/atomic" - "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/p2p/discover/v4wire" "github.com/ethereum/go-ethereum/p2p/enode" ) @@ -36,11 +35,13 @@ type CrawlOptions struct { // terminates immediately. Callers should pass at least the bootnodes. Seeds []*enode.Node - // Drange is the number of keyspace sub-regions to rotate the FINDNODE - // target through. Defaults to 16. Internally rounded up to the next power - // of two and capped at 256 (the keyspace top-byte width); on the discv4 - // path this caps the prefix-bit grind cost, on the discv5 path it caps - // the rotation to valid distances [1, 256]. + // Drange is the number of FINDNODE rotation slots per peer. Has effect + // only on the discv5 path, where each rotation slot d maps to the + // distance value 256-d (so Drange=16 covers distances 256, 255, ..., 241). + // On the discv4 path it has no effect: targets are random NodeIDs and + // the rotation counter is unused. + // + // Defaults to 16. Capped at 256. Drange int // OutputCap bounds the number of newly-discovered peers buffered for the @@ -69,43 +70,27 @@ func (o *CrawlOptions) withDefaults() { if o.Drange > 256 { o.Drange = 256 } - // Round up to the next power of two so the prefix-bit width is - // well-defined and Drange divides the top byte of the keyspace evenly. - if o.Drange&(o.Drange-1) != 0 { - o.Drange = nextPowerOfTwo(o.Drange) - } if o.OutputCap <= 0 { o.OutputCap = 16 * o.Workers } } -// nextPowerOfTwo returns the smallest power of two >= n, for n in [1, 256]. -func nextPowerOfTwo(n int) int { - p := 1 - for p < n { - p <<= 1 - } - return p -} - // CrawlIterator returns an enode.Iterator that performs a breadth-first -// crawl by issuing a single FINDNODE request per discovered peer, rotating -// the request's target through Drange sub-regions of the keyspace so that -// each peer is asked about a different slice. Compared to RandomNodes, this -// avoids the alpha-bounded Kademlia lookup convergence loop and is the right -// shape for breadth crawls (e.g. devp2p discv4 crawl). +// crawl by issuing a single FINDNODE request per discovered peer, with a +// fresh random target each call. Compared to RandomNodes, this avoids the +// alpha-bounded Kademlia lookup convergence loop and is the right shape +// for breadth crawls (e.g. devp2p discv4 crawl). // // Concurrency is bounded by opts.Workers; pacing is RTT-driven, not // rate-limited. func (t *UDPv4) CrawlIterator(opts CrawlOptions) enode.Iterator { - opts.withDefaults() - prefixBits := log2Pow2(opts.Drange) - queryFn := func(dst *enode.Node, d int) ([]*enode.Node, error) { + queryFn := func(dst *enode.Node, _ int) ([]*enode.Node, error) { addr, ok := dst.UDPEndpoint() if !ok { return nil, errNoUDPEndpoint } - target := randomTargetWithPrefix(uint8(d), prefixBits) + var target v4wire.Pubkey + crand.Read(target[:]) peers, err := t.findnode(dst.ID(), addr, target) if err != nil { t.log.Trace("FINDNODE failed", "id", dst.ID(), "err", err) @@ -131,35 +116,6 @@ func (t *UDPv5) CrawlIterator(opts CrawlOptions) enode.Iterator { return newCrawlIterator(opts, queryFn) } -// log2Pow2 returns log2(n) for power-of-two n. The caller must ensure n is a -// power of two; non-power-of-two inputs round down. -func log2Pow2(n int) int { - bits := 0 - for n > 1 { - n >>= 1 - bits++ - } - return bits -} - -// randomTargetWithPrefix returns a v4wire.Pubkey whose Keccak256 hash has its -// top `bits` bits equal to d. On average ~2^bits draws are needed. -func randomTargetWithPrefix(d uint8, bits int) v4wire.Pubkey { - if bits == 0 { - var pk v4wire.Pubkey - crand.Read(pk[:]) - return pk - } - for { - var pk v4wire.Pubkey - crand.Read(pk[:]) - h := crypto.Keccak256(pk[:]) - if (h[0] >> (8 - bits)) == d { - return pk - } - } -} - // crawlIterator is a breadth-first FINDNODE-driven iterator. It maintains a // shared work queue and an output buffer; workers pop from the queue, issue // one FINDNODE per pop, and feed any newly-seen peers back into both the