mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-05-13 11:36:37 +00:00
p2p/discover: decouple nodeFeed from Table mutex in waitForNodes (#34898)
Fixes #34881 This fixes a hang in `Table.waitForNodes`. It is a replacement for PRs #34890, #33665 which tried to fix the same issue in a different way. - #34890 doesn't really fix the issue, just makes it less likely - #33665 tries to fix it by moving the feed send outside of the lock I created this PR because I want to keep the synchronous node feed sending in `Table.nodeAdded`. --------- Co-authored-by: Csaba Kiraly <csaba.kiraly@gmail.com>
This commit is contained in:
parent
1abbae239d
commit
281dc4c209
2 changed files with 83 additions and 12 deletions
|
|
@ -753,6 +753,41 @@ func (tab *Table) deleteNode(n *enode.Node) {
|
||||||
|
|
||||||
// waitForNodes blocks until the table contains at least n nodes.
|
// waitForNodes blocks until the table contains at least n nodes.
|
||||||
func (tab *Table) waitForNodes(ctx context.Context, n int) error {
|
func (tab *Table) waitForNodes(ctx context.Context, n int) error {
|
||||||
|
// Wrap ctx so the forwarder goroutine exits when waitForNodes returns,
|
||||||
|
// regardless of whether the caller's ctx is canceled.
|
||||||
|
ctx, cancel := context.WithCancel(ctx)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// Set up a notification channel that gets unblocked when there was any activity on
|
||||||
|
// the table. Ultimately this reads from the table's nodeFeed, but can't use the feed
|
||||||
|
// directly on the same goroutine that takes Table.mutex, it would deadlock.
|
||||||
|
var notify chan struct{}
|
||||||
|
var notifyErr error
|
||||||
|
initsub := func() event.Subscription {
|
||||||
|
notify = make(chan struct{}, 1)
|
||||||
|
newnode := make(chan *enode.Node, 1)
|
||||||
|
sub := tab.nodeFeed.Subscribe(newnode)
|
||||||
|
go func() {
|
||||||
|
defer close(notify)
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-newnode:
|
||||||
|
select {
|
||||||
|
case notify <- struct{}{}:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
case <-ctx.Done():
|
||||||
|
notifyErr = ctx.Err()
|
||||||
|
return
|
||||||
|
case <-tab.closeReq:
|
||||||
|
notifyErr = errClosed
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return sub
|
||||||
|
}
|
||||||
|
|
||||||
getlength := func() (count int) {
|
getlength := func() (count int) {
|
||||||
for _, b := range &tab.buckets {
|
for _, b := range &tab.buckets {
|
||||||
count += len(b.entries)
|
count += len(b.entries)
|
||||||
|
|
@ -760,28 +795,24 @@ func (tab *Table) waitForNodes(ctx context.Context, n int) error {
|
||||||
return count
|
return count
|
||||||
}
|
}
|
||||||
|
|
||||||
var ch chan *enode.Node
|
|
||||||
for {
|
for {
|
||||||
tab.mutex.Lock()
|
tab.mutex.Lock()
|
||||||
if getlength() >= n {
|
if getlength() >= n {
|
||||||
tab.mutex.Unlock()
|
tab.mutex.Unlock()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if ch == nil {
|
if notify == nil {
|
||||||
// Init subscription.
|
// Lazily init the subscription. Do this while holding the
|
||||||
ch = make(chan *enode.Node)
|
// lock so we don't miss any events that change the node count.
|
||||||
sub := tab.nodeFeed.Subscribe(ch)
|
sub := initsub()
|
||||||
defer sub.Unsubscribe()
|
defer sub.Unsubscribe()
|
||||||
}
|
}
|
||||||
tab.mutex.Unlock()
|
tab.mutex.Unlock()
|
||||||
|
|
||||||
// Wait for a node add event.
|
// Wait for table event.
|
||||||
select {
|
if _, ok := <-notify; !ok {
|
||||||
case <-ch:
|
break
|
||||||
case <-ctx.Done():
|
|
||||||
return ctx.Err()
|
|
||||||
case <-tab.closeReq:
|
|
||||||
return errClosed
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return notifyErr
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@
|
||||||
package discover
|
package discover
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"crypto/ecdsa"
|
"crypto/ecdsa"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
|
|
@ -550,6 +551,45 @@ func TestSetFallbackNodes_DNSHostname(t *testing.T) {
|
||||||
t.Logf("resolved localhost to %v", resolved.IPAddr())
|
t.Logf("resolved localhost to %v", resolved.IPAddr())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This test checks that waitForNodes does not block addFoundNode.
|
||||||
|
// See https://github.com/ethereum/go-ethereum/issues/34881.
|
||||||
|
func TestTable_waitForNodesLocking(t *testing.T) {
|
||||||
|
transport := newPingRecorder()
|
||||||
|
tab, db := newTestTable(transport, Config{})
|
||||||
|
defer db.Close()
|
||||||
|
defer tab.close()
|
||||||
|
<-tab.initDone
|
||||||
|
|
||||||
|
// waitForNodes will never reach this count, so it stays subscribed
|
||||||
|
// to nodeFeed and looping for the duration of the test.
|
||||||
|
waitCtx, cancelWait := context.WithCancel(context.Background())
|
||||||
|
defer cancelWait()
|
||||||
|
waitDone := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
defer close(waitDone)
|
||||||
|
tab.waitForNodes(waitCtx, 1<<20)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Call addFoundNode in loop to send to the feed.
|
||||||
|
addDone := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
defer close(addDone)
|
||||||
|
for i := range 10000 {
|
||||||
|
d := 240 + (i % 17)
|
||||||
|
n := nodeAtDistance(tab.self().ID(), d, intIP(i))
|
||||||
|
tab.addFoundNode(n, true)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-addDone:
|
||||||
|
cancelWait()
|
||||||
|
<-waitDone
|
||||||
|
case <-time.After(10 * time.Second):
|
||||||
|
t.Fatal("deadlock detected: add loop did not finish within 10s")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func newkey() *ecdsa.PrivateKey {
|
func newkey() *ecdsa.PrivateKey {
|
||||||
key, err := crypto.GenerateKey()
|
key, err := crypto.GenerateKey()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue