diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go index 202e6c70ed..87421de7e7 100644 --- a/core/state/snapshot/disklayer.go +++ b/core/state/snapshot/disklayer.go @@ -41,6 +41,7 @@ type diskLayer struct { genMarker []byte // Marker for the state that's indexed during initial layer generation genPending chan struct{} // Notification channel when generation is done (test synchronicity) genAbort chan chan *generatorStats // Notification channel to abort generating the snapshot in this layer + genStop sync.Once // Guards stopGeneration so it can be called multiple times safely lock sync.RWMutex } @@ -184,17 +185,24 @@ func (dl *diskLayer) Update(blockHash common.Hash, accounts map[common.Hash][]by return newDiffLayer(dl, blockHash, accounts, storage) } -// stopGeneration aborts the state snapshot generation if it is currently running. +// stopGeneration aborts the state snapshot generation if it is currently +// running. It is safe to call multiple times: only the first call dispatches +// an abort signal to the generator; subsequent calls are no-ops. Tree.Disable +// and Tree.Rebuild both invoke this on every disk layer, so a layer that is +// reached through both paths must not deadlock on the second send to the +// unbuffered genAbort channel. func (dl *diskLayer) stopGeneration() { - dl.lock.RLock() - generating := dl.genMarker != nil - dl.lock.RUnlock() - if !generating { - return - } - if dl.genAbort != nil { - abort := make(chan *generatorStats) - dl.genAbort <- abort - <-abort - } + dl.genStop.Do(func() { + dl.lock.RLock() + generating := dl.genMarker != nil + dl.lock.RUnlock() + if !generating { + return + } + if dl.genAbort != nil { + abort := make(chan *generatorStats) + dl.genAbort <- abort + <-abort + } + }) } diff --git a/core/state/snapshot/disklayer_test.go b/core/state/snapshot/disklayer_test.go index 6d99a90d61..560755336b 100644 --- a/core/state/snapshot/disklayer_test.go +++ b/core/state/snapshot/disklayer_test.go @@ -19,6 +19,7 @@ package snapshot import ( "bytes" "testing" + "time" "github.com/VictoriaMetrics/fastcache" "github.com/ethereum/go-ethereum/common" @@ -586,3 +587,53 @@ func TestDiskSeek(t *testing.T) { } } } + +// TestStopGenerationIdempotent verifies that stopGeneration can be invoked +// multiple times on the same disk layer without deadlocking. Tree.Disable and +// Tree.Rebuild both walk every disk layer and call this, so a layer that is +// reachable from both paths must not hang on the second call's send to the +// unbuffered genAbort channel after the generator goroutine has already exited. +// Regression test for issue #33233. +func TestStopGenerationIdempotent(t *testing.T) { + t.Parallel() + + abortCh := make(chan chan *generatorStats) + dl := &diskLayer{ + diskdb: rawdb.NewMemoryDatabase(), + genMarker: []byte{}, // non-nil signals generation in progress + genAbort: abortCh, + } + + // Stand in for the generator goroutine: receive on genAbort exactly once, + // then exit. A second send by stopGeneration would deadlock the test. + generatorExited := make(chan struct{}) + go func() { + defer close(generatorExited) + ack := <-abortCh + ack <- &generatorStats{} + }() + + // First call drives the abort handshake. + dl.stopGeneration() + + // Generator must have observed the first abort and exited. + select { + case <-generatorExited: + case <-time.After(5 * time.Second): + t.Fatal("first stopGeneration never delivered abort to generator") + } + + // Second call must return immediately. Run in a goroutine so the test + // fails with a clear message on regression instead of hanging until the + // outer test timeout fires. + done := make(chan struct{}) + go func() { + defer close(done) + dl.stopGeneration() + }() + select { + case <-done: + case <-time.After(5 * time.Second): + t.Fatal("second stopGeneration deadlocked sending to genAbort after the generator had exited") + } +} diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go index f0f6296433..7940ca995f 100644 --- a/core/state/snapshot/snapshot.go +++ b/core/state/snapshot/snapshot.go @@ -258,8 +258,6 @@ func (t *Tree) Disable() { for _, layer := range t.layers { switch layer := layer.(type) { case *diskLayer: - // TODO this function will hang if it's called twice. Will - // fix it in the following PRs. layer.stopGeneration() layer.markStale() layer.Release() @@ -702,8 +700,6 @@ func (t *Tree) Rebuild(root common.Hash) { for _, layer := range t.layers { switch layer := layer.(type) { case *diskLayer: - // TODO this function will hang if it's called twice. Will - // fix it in the following PRs. layer.stopGeneration() layer.markStale() layer.Release()