mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-05-24 08:49:29 +00:00
Two related changes to CrawlIterator: (1) Add a file-level commentary block explaining why the iterator uses a FIFO queue (BFS over the FINDNODE-response graph) and what it is *not* suitable for (target-directed lookup -- use RandomNodes() / the alpha=3 lookup iterator for that). The choice was inherited from dcrawl.nim without explicit reasoning; making it visible avoids future readers re-deriving the survey-vs-lookup distinction. The BFS rationale is two-fold: - Coverage: BFS reaches every peer within N hops of the seeds in order, so a time-bounded run produces a representative sample of the reachable graph rather than a deep tendril through one sub-region. - Adversarial resilience: a peer returning malicious "neighbour" claims, dead-end peers, or eclipse-style sub-graphs cannot monopolise the worker pool, because pending work from other branches sits ahead of the attacker's responses in the queue. DFS would amplify each of these attacks. (2) Add a RandomWorkers field to CrawlOptions. Of the Workers-sized worker pool, the first (Workers - RandomWorkers) workers pop the FIFO front (BFS), while RandomWorkers workers pop a uniform-random queue index via swap-and-pop (O(1)). Total worker count is unchanged. Default RandomWorkers = Workers / 4 (4 of 16 with the default parallelism). At this ratio: - Cold-start cost is negligible: 12 of 16 workers still drain FIFO, so the first ~1s of a fresh crawl behaves like pure BFS. - 25% of pops break strict FIFO ordering, providing a mild anti-fingerprint defence against an attacker who could otherwise predict our processing order from the contents of their own FINDNODE responses. Operators can override per-run via the new --random-workers CLI flag on `devp2p discv4 crawl` and `discv5 crawl`. Negative value forces pure BFS; positive value selects an explicit count. The new TestCrawlIteratorRandomWorkers covers four pop-policy configurations (all-fifo, all-random, half-half, default) and asserts the iterator still terminates and emits each node exactly once in each.
174 lines
4.4 KiB
Go
174 lines
4.4 KiB
Go
// Copyright 2020 The go-ethereum Authors
|
|
// This file is part of go-ethereum.
|
|
//
|
|
// go-ethereum is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// go-ethereum is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package main
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"slices"
|
|
"time"
|
|
|
|
"github.com/ethereum/go-ethereum/cmd/devp2p/internal/v5test"
|
|
"github.com/ethereum/go-ethereum/common"
|
|
"github.com/ethereum/go-ethereum/p2p/discover"
|
|
"github.com/ethereum/go-ethereum/p2p/enode"
|
|
"github.com/urfave/cli/v2"
|
|
)
|
|
|
|
var (
|
|
discv5Command = &cli.Command{
|
|
Name: "discv5",
|
|
Usage: "Node Discovery v5 tools",
|
|
Subcommands: []*cli.Command{
|
|
discv5PingCommand,
|
|
discv5ResolveCommand,
|
|
discv5CrawlCommand,
|
|
discv5TestCommand,
|
|
discv5ListenCommand,
|
|
},
|
|
}
|
|
discv5PingCommand = &cli.Command{
|
|
Name: "ping",
|
|
Usage: "Sends ping to a node",
|
|
Action: discv5Ping,
|
|
Flags: discoveryNodeFlags,
|
|
}
|
|
discv5ResolveCommand = &cli.Command{
|
|
Name: "resolve",
|
|
Usage: "Finds a node in the DHT",
|
|
Action: discv5Resolve,
|
|
Flags: discoveryNodeFlags,
|
|
}
|
|
discv5CrawlCommand = &cli.Command{
|
|
Name: "crawl",
|
|
Usage: "Updates a nodes.json file with random nodes found in the DHT",
|
|
Action: discv5Crawl,
|
|
Flags: slices.Concat(discoveryNodeFlags, []cli.Flag{
|
|
crawlTimeoutFlag,
|
|
crawlParallelismFlag,
|
|
crawlModeFlag,
|
|
crawlRandomWorkersFlag,
|
|
}),
|
|
}
|
|
discv5TestCommand = &cli.Command{
|
|
Name: "test",
|
|
Usage: "Runs protocol tests against a node",
|
|
Action: discv5Test,
|
|
Flags: []cli.Flag{
|
|
testPatternFlag,
|
|
testTAPFlag,
|
|
testListen1Flag,
|
|
testListen2Flag,
|
|
},
|
|
}
|
|
discv5ListenCommand = &cli.Command{
|
|
Name: "listen",
|
|
Usage: "Runs a node",
|
|
Action: discv5Listen,
|
|
Flags: discoveryNodeFlags,
|
|
}
|
|
)
|
|
|
|
func discv5Ping(ctx *cli.Context) error {
|
|
n := getNodeArg(ctx)
|
|
disc, _ := startV5(ctx)
|
|
defer disc.Close()
|
|
|
|
_, err := disc.Ping(n)
|
|
fmt.Println(err)
|
|
return nil
|
|
}
|
|
|
|
func discv5Resolve(ctx *cli.Context) error {
|
|
n := getNodeArg(ctx)
|
|
disc, _ := startV5(ctx)
|
|
defer disc.Close()
|
|
|
|
fmt.Println(disc.Resolve(n))
|
|
return nil
|
|
}
|
|
|
|
func discv5Crawl(ctx *cli.Context) error {
|
|
if ctx.NArg() < 1 {
|
|
return errors.New("need nodes file as argument")
|
|
}
|
|
nodesFile := ctx.Args().First()
|
|
inputSet := make(nodeSet)
|
|
if common.FileExist(nodesFile) {
|
|
inputSet = loadNodesJSON(nodesFile)
|
|
}
|
|
|
|
disc, config := startV5(ctx)
|
|
defer disc.Close()
|
|
|
|
iter, err := newDiscv5CrawlIterator(disc, config.Bootnodes, ctx.String(crawlModeFlag.Name), ctx.Int(crawlParallelismFlag.Name), ctx.Int(crawlRandomWorkersFlag.Name))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c, err := newCrawler(inputSet, config.Bootnodes, disc, iter)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.revalidateInterval = 10 * time.Minute
|
|
output := c.run(ctx.Duration(crawlTimeoutFlag.Name), ctx.Int(crawlParallelismFlag.Name))
|
|
writeNodesJSON(nodesFile, output)
|
|
return nil
|
|
}
|
|
|
|
func newDiscv5CrawlIterator(disc *discover.UDPv5, bootnodes []*enode.Node, mode string, parallel, randomWorkers int) (enode.Iterator, error) {
|
|
switch mode {
|
|
case "", "lookup":
|
|
return disc.RandomNodes(), nil
|
|
case "fast":
|
|
return disc.CrawlIterator(discover.CrawlOptions{
|
|
Workers: parallel,
|
|
RandomWorkers: randomWorkers,
|
|
Seeds: bootnodes,
|
|
}), nil
|
|
default:
|
|
return nil, fmt.Errorf("unknown -%s value %q (want 'lookup' or 'fast')", crawlModeFlag.Name, mode)
|
|
}
|
|
}
|
|
|
|
// discv5Test runs the protocol test suite.
|
|
func discv5Test(ctx *cli.Context) error {
|
|
suite := &v5test.Suite{
|
|
Dest: getNodeArg(ctx),
|
|
Listen1: ctx.String(testListen1Flag.Name),
|
|
Listen2: ctx.String(testListen2Flag.Name),
|
|
}
|
|
return runTests(ctx, suite.AllTests())
|
|
}
|
|
|
|
func discv5Listen(ctx *cli.Context) error {
|
|
disc, _ := startV5(ctx)
|
|
defer disc.Close()
|
|
|
|
fmt.Println(disc.Self())
|
|
select {}
|
|
}
|
|
|
|
// startV5 starts an ephemeral discovery v5 node.
|
|
func startV5(ctx *cli.Context) (*discover.UDPv5, discover.Config) {
|
|
ln, config := makeDiscoveryConfig(ctx)
|
|
socket := listen(ctx, ln)
|
|
disc, err := discover.ListenV5(socket, ln, config)
|
|
if err != nil {
|
|
exit(err)
|
|
}
|
|
return disc, config
|
|
}
|