mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-05-24 16:59:26 +00:00
core/state/snapshot: make diskLayer.stopGeneration idempotent
Fixes the deadlock noted by the "TODO this function will hang if it's called twice" markers in Tree.Disable and Tree.Rebuild. Both paths walk every layer in the tree and unconditionally invoke stopGeneration on each disk layer, so a layer reachable through both must tolerate a second call. The previous implementation sent on the unbuffered genAbort channel whenever genMarker was non-nil. After the first abort handshake the generator goroutine exits, but genMarker is only cleared on the successful-completion path (generate.go), not on the aborted-mid-flight path. A second stopGeneration therefore saw generating=true, sent on genAbort with no receiver, and blocked forever. Wrap the abort handshake in a sync.Once on the disk layer. The first call drives the handshake exactly as before; subsequent calls are no-ops. Remove both TODO comments now that the contract is honoured. Adds TestStopGenerationIdempotent: stands in a mock generator goroutine that consumes a single abort, calls stopGeneration twice, and fails with an explicit message at a 5s deadline rather than hanging until the test runner's outer timeout. Fixes #33233.
This commit is contained in:
parent
21c5a287f9
commit
963d78dfcc
3 changed files with 71 additions and 16 deletions
|
|
@ -41,6 +41,7 @@ type diskLayer struct {
|
|||
genMarker []byte // Marker for the state that's indexed during initial layer generation
|
||||
genPending chan struct{} // Notification channel when generation is done (test synchronicity)
|
||||
genAbort chan chan *generatorStats // Notification channel to abort generating the snapshot in this layer
|
||||
genStop sync.Once // Guards stopGeneration so it can be called multiple times safely
|
||||
|
||||
lock sync.RWMutex
|
||||
}
|
||||
|
|
@ -184,17 +185,24 @@ func (dl *diskLayer) Update(blockHash common.Hash, accounts map[common.Hash][]by
|
|||
return newDiffLayer(dl, blockHash, accounts, storage)
|
||||
}
|
||||
|
||||
// stopGeneration aborts the state snapshot generation if it is currently running.
|
||||
// stopGeneration aborts the state snapshot generation if it is currently
|
||||
// running. It is safe to call multiple times: only the first call dispatches
|
||||
// an abort signal to the generator; subsequent calls are no-ops. Tree.Disable
|
||||
// and Tree.Rebuild both invoke this on every disk layer, so a layer that is
|
||||
// reached through both paths must not deadlock on the second send to the
|
||||
// unbuffered genAbort channel.
|
||||
func (dl *diskLayer) stopGeneration() {
|
||||
dl.lock.RLock()
|
||||
generating := dl.genMarker != nil
|
||||
dl.lock.RUnlock()
|
||||
if !generating {
|
||||
return
|
||||
}
|
||||
if dl.genAbort != nil {
|
||||
abort := make(chan *generatorStats)
|
||||
dl.genAbort <- abort
|
||||
<-abort
|
||||
}
|
||||
dl.genStop.Do(func() {
|
||||
dl.lock.RLock()
|
||||
generating := dl.genMarker != nil
|
||||
dl.lock.RUnlock()
|
||||
if !generating {
|
||||
return
|
||||
}
|
||||
if dl.genAbort != nil {
|
||||
abort := make(chan *generatorStats)
|
||||
dl.genAbort <- abort
|
||||
<-abort
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ package snapshot
|
|||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/fastcache"
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
|
|
@ -586,3 +587,53 @@ func TestDiskSeek(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestStopGenerationIdempotent verifies that stopGeneration can be invoked
|
||||
// multiple times on the same disk layer without deadlocking. Tree.Disable and
|
||||
// Tree.Rebuild both walk every disk layer and call this, so a layer that is
|
||||
// reachable from both paths must not hang on the second call's send to the
|
||||
// unbuffered genAbort channel after the generator goroutine has already exited.
|
||||
// Regression test for issue #33233.
|
||||
func TestStopGenerationIdempotent(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
abortCh := make(chan chan *generatorStats)
|
||||
dl := &diskLayer{
|
||||
diskdb: rawdb.NewMemoryDatabase(),
|
||||
genMarker: []byte{}, // non-nil signals generation in progress
|
||||
genAbort: abortCh,
|
||||
}
|
||||
|
||||
// Stand in for the generator goroutine: receive on genAbort exactly once,
|
||||
// then exit. A second send by stopGeneration would deadlock the test.
|
||||
generatorExited := make(chan struct{})
|
||||
go func() {
|
||||
defer close(generatorExited)
|
||||
ack := <-abortCh
|
||||
ack <- &generatorStats{}
|
||||
}()
|
||||
|
||||
// First call drives the abort handshake.
|
||||
dl.stopGeneration()
|
||||
|
||||
// Generator must have observed the first abort and exited.
|
||||
select {
|
||||
case <-generatorExited:
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatal("first stopGeneration never delivered abort to generator")
|
||||
}
|
||||
|
||||
// Second call must return immediately. Run in a goroutine so the test
|
||||
// fails with a clear message on regression instead of hanging until the
|
||||
// outer test timeout fires.
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
defer close(done)
|
||||
dl.stopGeneration()
|
||||
}()
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatal("second stopGeneration deadlocked sending to genAbort after the generator had exited")
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -258,8 +258,6 @@ func (t *Tree) Disable() {
|
|||
for _, layer := range t.layers {
|
||||
switch layer := layer.(type) {
|
||||
case *diskLayer:
|
||||
// TODO this function will hang if it's called twice. Will
|
||||
// fix it in the following PRs.
|
||||
layer.stopGeneration()
|
||||
layer.markStale()
|
||||
layer.Release()
|
||||
|
|
@ -702,8 +700,6 @@ func (t *Tree) Rebuild(root common.Hash) {
|
|||
for _, layer := range t.layers {
|
||||
switch layer := layer.(type) {
|
||||
case *diskLayer:
|
||||
// TODO this function will hang if it's called twice. Will
|
||||
// fix it in the following PRs.
|
||||
layer.stopGeneration()
|
||||
layer.markStale()
|
||||
layer.Release()
|
||||
|
|
|
|||
Loading…
Reference in a new issue