go-ethereum/cmd/devp2p/discv5cmd.go
Csaba Kiraly 33785aab21
p2p/discover: document BFS choice, add RandomWorkers split
Two related changes to CrawlIterator:

(1) Add a file-level commentary block explaining why the iterator uses a
FIFO queue (BFS over the FINDNODE-response graph) and what it is *not*
suitable for (target-directed lookup -- use RandomNodes() / the alpha=3
lookup iterator for that). The choice was inherited from dcrawl.nim
without explicit reasoning; making it visible avoids future readers
re-deriving the survey-vs-lookup distinction.

The BFS rationale is two-fold:

 - Coverage: BFS reaches every peer within N hops of the seeds in
   order, so a time-bounded run produces a representative sample of the
   reachable graph rather than a deep tendril through one sub-region.
 - Adversarial resilience: a peer returning malicious "neighbour"
   claims, dead-end peers, or eclipse-style sub-graphs cannot
   monopolise the worker pool, because pending work from other branches
   sits ahead of the attacker's responses in the queue. DFS would
   amplify each of these attacks.

(2) Add a RandomWorkers field to CrawlOptions. Of the Workers-sized
worker pool, the first (Workers - RandomWorkers) workers pop the FIFO
front (BFS), while RandomWorkers workers pop a uniform-random queue
index via swap-and-pop (O(1)). Total worker count is unchanged.

Default RandomWorkers = Workers / 4 (4 of 16 with the default
parallelism). At this ratio:

 - Cold-start cost is negligible: 12 of 16 workers still drain FIFO,
   so the first ~1s of a fresh crawl behaves like pure BFS.
 - 25% of pops break strict FIFO ordering, providing a mild
   anti-fingerprint defence against an attacker who could otherwise
   predict our processing order from the contents of their own
   FINDNODE responses.

Operators can override per-run via the new --random-workers CLI flag
on `devp2p discv4 crawl` and `discv5 crawl`. Negative value forces
pure BFS; positive value selects an explicit count.

The new TestCrawlIteratorRandomWorkers covers four pop-policy
configurations (all-fifo, all-random, half-half, default) and
asserts the iterator still terminates and emits each node exactly
once in each.
2026-05-07 14:41:58 +02:00

174 lines
4.4 KiB
Go

// Copyright 2020 The go-ethereum Authors
// This file is part of go-ethereum.
//
// go-ethereum is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// go-ethereum is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
package main
import (
"errors"
"fmt"
"slices"
"time"
"github.com/ethereum/go-ethereum/cmd/devp2p/internal/v5test"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/p2p/discover"
"github.com/ethereum/go-ethereum/p2p/enode"
"github.com/urfave/cli/v2"
)
var (
discv5Command = &cli.Command{
Name: "discv5",
Usage: "Node Discovery v5 tools",
Subcommands: []*cli.Command{
discv5PingCommand,
discv5ResolveCommand,
discv5CrawlCommand,
discv5TestCommand,
discv5ListenCommand,
},
}
discv5PingCommand = &cli.Command{
Name: "ping",
Usage: "Sends ping to a node",
Action: discv5Ping,
Flags: discoveryNodeFlags,
}
discv5ResolveCommand = &cli.Command{
Name: "resolve",
Usage: "Finds a node in the DHT",
Action: discv5Resolve,
Flags: discoveryNodeFlags,
}
discv5CrawlCommand = &cli.Command{
Name: "crawl",
Usage: "Updates a nodes.json file with random nodes found in the DHT",
Action: discv5Crawl,
Flags: slices.Concat(discoveryNodeFlags, []cli.Flag{
crawlTimeoutFlag,
crawlParallelismFlag,
crawlModeFlag,
crawlRandomWorkersFlag,
}),
}
discv5TestCommand = &cli.Command{
Name: "test",
Usage: "Runs protocol tests against a node",
Action: discv5Test,
Flags: []cli.Flag{
testPatternFlag,
testTAPFlag,
testListen1Flag,
testListen2Flag,
},
}
discv5ListenCommand = &cli.Command{
Name: "listen",
Usage: "Runs a node",
Action: discv5Listen,
Flags: discoveryNodeFlags,
}
)
func discv5Ping(ctx *cli.Context) error {
n := getNodeArg(ctx)
disc, _ := startV5(ctx)
defer disc.Close()
_, err := disc.Ping(n)
fmt.Println(err)
return nil
}
func discv5Resolve(ctx *cli.Context) error {
n := getNodeArg(ctx)
disc, _ := startV5(ctx)
defer disc.Close()
fmt.Println(disc.Resolve(n))
return nil
}
func discv5Crawl(ctx *cli.Context) error {
if ctx.NArg() < 1 {
return errors.New("need nodes file as argument")
}
nodesFile := ctx.Args().First()
inputSet := make(nodeSet)
if common.FileExist(nodesFile) {
inputSet = loadNodesJSON(nodesFile)
}
disc, config := startV5(ctx)
defer disc.Close()
iter, err := newDiscv5CrawlIterator(disc, config.Bootnodes, ctx.String(crawlModeFlag.Name), ctx.Int(crawlParallelismFlag.Name), ctx.Int(crawlRandomWorkersFlag.Name))
if err != nil {
return err
}
c, err := newCrawler(inputSet, config.Bootnodes, disc, iter)
if err != nil {
return err
}
c.revalidateInterval = 10 * time.Minute
output := c.run(ctx.Duration(crawlTimeoutFlag.Name), ctx.Int(crawlParallelismFlag.Name))
writeNodesJSON(nodesFile, output)
return nil
}
func newDiscv5CrawlIterator(disc *discover.UDPv5, bootnodes []*enode.Node, mode string, parallel, randomWorkers int) (enode.Iterator, error) {
switch mode {
case "", "lookup":
return disc.RandomNodes(), nil
case "fast":
return disc.CrawlIterator(discover.CrawlOptions{
Workers: parallel,
RandomWorkers: randomWorkers,
Seeds: bootnodes,
}), nil
default:
return nil, fmt.Errorf("unknown -%s value %q (want 'lookup' or 'fast')", crawlModeFlag.Name, mode)
}
}
// discv5Test runs the protocol test suite.
func discv5Test(ctx *cli.Context) error {
suite := &v5test.Suite{
Dest: getNodeArg(ctx),
Listen1: ctx.String(testListen1Flag.Name),
Listen2: ctx.String(testListen2Flag.Name),
}
return runTests(ctx, suite.AllTests())
}
func discv5Listen(ctx *cli.Context) error {
disc, _ := startV5(ctx)
defer disc.Close()
fmt.Println(disc.Self())
select {}
}
// startV5 starts an ephemeral discovery v5 node.
func startV5(ctx *cli.Context) (*discover.UDPv5, discover.Config) {
ln, config := makeDiscoveryConfig(ctx)
socket := listen(ctx, ln)
disc, err := discover.ListenV5(socket, ln, config)
if err != nil {
exit(err)
}
return disc, config
}