diff --git a/cmd/devp2p/discv4cmd.go b/cmd/devp2p/discv4cmd.go index 388e0c523a..945da3bc53 100644 --- a/cmd/devp2p/discv4cmd.go +++ b/cmd/devp2p/discv4cmd.go @@ -91,7 +91,7 @@ var ( Name: "crawl", Usage: "Updates a nodes.json file with random nodes found in the DHT", Action: discv4Crawl, - Flags: slices.Concat(discoveryNodeFlags, []cli.Flag{crawlTimeoutFlag, crawlParallelismFlag, crawlModeFlag}), + Flags: slices.Concat(discoveryNodeFlags, []cli.Flag{crawlTimeoutFlag, crawlParallelismFlag, crawlModeFlag, crawlRandomWorkersFlag}), } discv4TestCommand = &cli.Command{ Name: "test", @@ -143,6 +143,11 @@ var ( Usage: "Crawl iterator mode: 'lookup' (alpha-bounded Kademlia lookup) or 'fast' (one FINDNODE per peer with rotating prefix; sized by -parallel).", Value: "lookup", } + crawlRandomWorkersFlag = &cli.IntFlag{ + Name: "random-workers", + Usage: "Of the -parallel workers in -mode=fast, how many pop a random queue item rather than the FIFO front. 0 = library default (parallel/4); negative = pure BFS.", + Value: 0, + } remoteEnodeFlag = &cli.StringFlag{ Name: "remote", Usage: "Enode of the remote node under test", @@ -264,7 +269,7 @@ func discv4Crawl(ctx *cli.Context) error { disc, config := startV4(ctx) defer disc.Close() - iter, err := newDiscv4CrawlIterator(disc, config.Bootnodes, ctx.String(crawlModeFlag.Name), ctx.Int(crawlParallelismFlag.Name)) + iter, err := newDiscv4CrawlIterator(disc, config.Bootnodes, ctx.String(crawlModeFlag.Name), ctx.Int(crawlParallelismFlag.Name), ctx.Int(crawlRandomWorkersFlag.Name)) if err != nil { return err } @@ -278,14 +283,15 @@ func discv4Crawl(ctx *cli.Context) error { return nil } -func newDiscv4CrawlIterator(disc *discover.UDPv4, bootnodes []*enode.Node, mode string, parallel int) (enode.Iterator, error) { +func newDiscv4CrawlIterator(disc *discover.UDPv4, bootnodes []*enode.Node, mode string, parallel, randomWorkers int) (enode.Iterator, error) { switch mode { case "", "lookup": return disc.RandomNodes(), nil case "fast": return disc.CrawlIterator(discover.CrawlOptions{ - Workers: parallel, - Seeds: bootnodes, + Workers: parallel, + RandomWorkers: randomWorkers, + Seeds: bootnodes, }), nil default: return nil, fmt.Errorf("unknown -%s value %q (want 'lookup' or 'fast')", crawlModeFlag.Name, mode) diff --git a/cmd/devp2p/discv5cmd.go b/cmd/devp2p/discv5cmd.go index 7497cccc55..27237a6236 100644 --- a/cmd/devp2p/discv5cmd.go +++ b/cmd/devp2p/discv5cmd.go @@ -61,6 +61,7 @@ var ( crawlTimeoutFlag, crawlParallelismFlag, crawlModeFlag, + crawlRandomWorkersFlag, }), } discv5TestCommand = &cli.Command{ @@ -114,7 +115,7 @@ func discv5Crawl(ctx *cli.Context) error { disc, config := startV5(ctx) defer disc.Close() - iter, err := newDiscv5CrawlIterator(disc, config.Bootnodes, ctx.String(crawlModeFlag.Name), ctx.Int(crawlParallelismFlag.Name)) + iter, err := newDiscv5CrawlIterator(disc, config.Bootnodes, ctx.String(crawlModeFlag.Name), ctx.Int(crawlParallelismFlag.Name), ctx.Int(crawlRandomWorkersFlag.Name)) if err != nil { return err } @@ -128,14 +129,15 @@ func discv5Crawl(ctx *cli.Context) error { return nil } -func newDiscv5CrawlIterator(disc *discover.UDPv5, bootnodes []*enode.Node, mode string, parallel int) (enode.Iterator, error) { +func newDiscv5CrawlIterator(disc *discover.UDPv5, bootnodes []*enode.Node, mode string, parallel, randomWorkers int) (enode.Iterator, error) { switch mode { case "", "lookup": return disc.RandomNodes(), nil case "fast": return disc.CrawlIterator(discover.CrawlOptions{ - Workers: parallel, - Seeds: bootnodes, + Workers: parallel, + RandomWorkers: randomWorkers, + Seeds: bootnodes, }), nil default: return nil, fmt.Errorf("unknown -%s value %q (want 'lookup' or 'fast')", crawlModeFlag.Name, mode) diff --git a/p2p/discover/crawliter.go b/p2p/discover/crawliter.go index 8a7fc5c92b..9be1e75271 100644 --- a/p2p/discover/crawliter.go +++ b/p2p/discover/crawliter.go @@ -18,6 +18,7 @@ package discover import ( crand "crypto/rand" + "math/rand/v2" "sync" "sync/atomic" @@ -25,6 +26,37 @@ import ( "github.com/ethereum/go-ethereum/p2p/enode" ) +// CrawlIterator performs a breadth-first crawl of the discv4/discv5 +// FINDNODE-response graph. Each worker pops a peer from the work queue, +// issues one FINDNODE call against it, and feeds any newly-seen peers +// back into the queue (and the iterator's output buffer). +// +// BFS (FIFO queue) was chosen over DFS or target-directed lookup for two +// reasons that align with the iterator's intended use case (survey-style +// crawls — devp2p crawl, geth dial-candidate discovery): +// +// - Coverage: BFS reaches every peer within N hops of the seeds in +// order, so a time-bounded run produces a representative sample of +// the reachable graph rather than a deep tendril through one +// sub-region. +// - Adversarial resilience: a peer returning malicious "neighbour" +// claims, dead-end peers, or eclipse-style sub-graphs cannot +// monopolise the worker pool, because pending work from other +// branches sits ahead of the attacker's responses in the queue. +// DFS would amplify each of these attacks. +// +// CrawlIterator is NOT a target-directed search. To find a specific node +// (or the K closest to a known target), use the lookup-based iterator +// returned by [UDPv4.RandomNodes] / [UDPv5.RandomNodes] instead — those +// run an alpha-bounded Kademlia lookup, which is the right shape for +// "find peer X" but the wrong shape for "survey the network". +// +// Pop policy can be tuned via [CrawlOptions.RandomWorkers]: any subset +// of the worker pool can be configured to pop a uniform-random queue +// item instead of the FIFO front, breaking strict ordering as a mild +// anti-fingerprint defence. The remaining workers pop FIFO and preserve +// the BFS character. See the field doc for details. + // CrawlOptions configures a CrawlIterator. type CrawlOptions struct { // Workers is the number of concurrent FINDNODE calls in flight. @@ -58,6 +90,23 @@ type CrawlOptions struct { // once across a long crawl. Realistic bound is the reachable DHT size // (~1M peers, ~50 MB). OutputCap int + + // RandomWorkers is the number of workers in the pool that pop a uniform- + // random queue item instead of the FIFO front. The remaining + // Workers - RandomWorkers workers behave as today, popping front. Total + // worker count is unchanged at Workers; only the BFS-vs-random split + // differs. + // + // Zero (the struct zero-value) selects the library default of + // Workers/4 rounded down. Pass a negative value (e.g. -1) to force + // pure BFS with zero random workers. A positive value > Workers is + // clamped to Workers (i.e. fully-random pop with no FIFO workers). + // + // The default Workers/4 mix preserves the BFS coverage character (cold + // start is dominated by the FIFO majority) while breaking strict FIFO + // ordering, a mild anti-fingerprint defence. See the file-level comment + // for details. + RandomWorkers int } func (o *CrawlOptions) withDefaults() { @@ -73,6 +122,17 @@ func (o *CrawlOptions) withDefaults() { if o.OutputCap <= 0 { o.OutputCap = 16 * o.Workers } + switch { + case o.RandomWorkers < 0: + // Negative means: explicit pure BFS, no random workers. + o.RandomWorkers = 0 + case o.RandomWorkers == 0: + // Zero (struct zero-value) means: library default. + o.RandomWorkers = o.Workers / 4 + case o.RandomWorkers > o.Workers: + // Clamp; can't have more random workers than total workers. + o.RandomWorkers = o.Workers + } } // CrawlIterator returns an enode.Iterator that performs a breadth-first @@ -166,10 +226,17 @@ func newCrawlIterator(opts CrawlOptions, queryFn func(*enode.Node, int) ([]*enod it.inflight++ } - // Workers. - for i := 0; i < opts.Workers; i++ { + // Workers. The first (Workers - RandomWorkers) workers pop FIFO; the + // remaining RandomWorkers pop a uniform-random queue index. Both share + // the same queue, mutex, cond, and termination semantics. + fifoCount := opts.Workers - opts.RandomWorkers + for i := 0; i < fifoCount; i++ { it.wg.Add(1) - go it.worker() + go it.worker(false) + } + for i := 0; i < opts.RandomWorkers; i++ { + it.wg.Add(1) + go it.worker(true) } return it } @@ -207,8 +274,10 @@ func (it *crawlIterator) discover(n *enode.Node) { } // popWork blocks until either a peer is available to query, or the iterator -// has nothing left to do. Returns (nil, false) on termination. -func (it *crawlIterator) popWork() (*enode.Node, bool) { +// has nothing left to do. Returns (nil, false) on termination. If random is +// true, pops a uniform-random index via swap-and-pop; otherwise pops the +// FIFO front. Both modes share the same termination semantics. +func (it *crawlIterator) popWork(random bool) (*enode.Node, bool) { it.mu.Lock() defer it.mu.Unlock() for { @@ -216,6 +285,14 @@ func (it *crawlIterator) popWork() (*enode.Node, bool) { return nil, false } if len(it.queue) > 0 { + if random { + i := rand.IntN(len(it.queue)) + n := it.queue[i] + // Swap-and-pop: O(1) removal from middle. + it.queue[i] = it.queue[len(it.queue)-1] + it.queue = it.queue[:len(it.queue)-1] + return n, true + } n := it.queue[0] it.queue = it.queue[1:] return n, true @@ -243,10 +320,10 @@ func (it *crawlIterator) finishWork() { } } -func (it *crawlIterator) worker() { +func (it *crawlIterator) worker(random bool) { defer it.wg.Done() for { - n, ok := it.popWork() + n, ok := it.popWork(random) if !ok { return } diff --git a/p2p/discover/crawliter_test.go b/p2p/discover/crawliter_test.go index 42a4b1b9ca..f646dc01bb 100644 --- a/p2p/discover/crawliter_test.go +++ b/p2p/discover/crawliter_test.go @@ -223,6 +223,69 @@ func TestCrawlIteratorOutputCap(t *testing.T) { } } +// TestCrawlIteratorRandomWorkers verifies that with a mixed FIFO + random +// worker pool, the iterator still terminates and emits each node exactly +// once on the synthetic graph from TestCrawlIteratorTerminates. +func TestCrawlIteratorRandomWorkers(t *testing.T) { + for _, tc := range []struct { + name string + workers int + randomWorkers int + }{ + {"all-fifo", 4, -1}, // -1 → pure BFS + {"all-random", 4, 4}, // all workers random-pop + {"half-half", 8, 4}, // 4 FIFO + 4 random + {"default", 16, 0}, // 0 → library default = 4 random of 16 + } { + t.Run(tc.name, func(t *testing.T) { + nodes := makeTestNodes(t, 50) + neighbours := make(map[enode.ID][]*enode.Node, len(nodes)) + for i, n := range nodes { + var ns []*enode.Node + for k := 1; k <= 5; k++ { + ns = append(ns, nodes[(i+k)%len(nodes)]) + } + neighbours[n.ID()] = ns + } + var calls atomic.Int64 + queryFn := func(dst *enode.Node, _ int) ([]*enode.Node, error) { + calls.Add(1) + return neighbours[dst.ID()], nil + } + it := newCrawlIterator(CrawlOptions{ + Workers: tc.workers, + RandomWorkers: tc.randomWorkers, + Seeds: []*enode.Node{nodes[0]}, + Drange: 16, + }, queryFn) + seen := make(map[enode.ID]int) + done := make(chan struct{}) + go func() { + for it.Next() { + seen[it.Node().ID()]++ + } + close(done) + }() + select { + case <-done: + case <-time.After(5 * time.Second): + t.Fatal("iterator did not terminate within 5s") + } + if got := len(seen); got != len(nodes) { + t.Fatalf("emitted %d distinct nodes, want %d", got, len(nodes)) + } + for id, c := range seen { + if c != 1 { + t.Errorf("node %x emitted %d times, want 1", id[:4], c) + } + } + if got, want := calls.Load(), int64(len(nodes)); got != want { + t.Errorf("queryFn invoked %d times, want %d", got, want) + } + }) + } +} + // TestCrawlIteratorRotation verifies that the d argument passed to queryFn // rotates through 0..Drange-1. func TestCrawlIteratorRotation(t *testing.T) {