From 6d048460cec580f0b3a9f12fff8d5033c7c12ef4 Mon Sep 17 00:00:00 2001 From: Jonathan Oppenheimer Date: Wed, 7 Jan 2026 03:16:50 -0500 Subject: [PATCH] refactor: track routine running instead --- core/state/snapshot/disklayer.go | 16 ++++++++++------ core/state/snapshot/generate.go | 3 +++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go index 0b9c66dbe8..dcc2b73ad5 100644 --- a/core/state/snapshot/disklayer.go +++ b/core/state/snapshot/disklayer.go @@ -19,6 +19,7 @@ package snapshot import ( "bytes" "sync" + "sync/atomic" "time" "github.com/VictoriaMetrics/fastcache" @@ -43,6 +44,7 @@ type diskLayer struct { genMarker []byte // Marker for the state that's indexed during initial layer generation genPending chan struct{} // Notification channel when generation is done (test synchronicity) genAbort chan chan *generatorStats // Notification channel to abort generating the snapshot in this layer + genRunning atomic.Bool // Tracks whether the generator goroutine is actually running lock sync.RWMutex } @@ -192,10 +194,14 @@ func (dl *diskLayer) Update(blockHash common.Hash, accounts map[common.Hash][]by // stopGeneration aborts the state snapshot generation if it is currently running. func (dl *diskLayer) stopGeneration() { - // Check if generation goroutine is running by checking if genAbort channel exists. - // Note: genMarker can be nil even when the generator is still running (waiting - // for abort signal after completing generation), so we check genAbort instead. + // Check if generation goroutine is actually running // + // Note: genMarker can be nil even when the generator is still running (waiting + // for abort signal after completing generation), so we can't rely on genMarker. + if !dl.genRunning.Load() { + return + } + // Use write lock to ensure only one goroutine can stop generation at a time, // preventing a race where multiple callers might try to send abort signals. dl.lock.Lock() @@ -210,14 +216,12 @@ func (dl *diskLayer) stopGeneration() { dl.lock.Unlock() // Perform the channel handshake without holding the lock to avoid deadlocks. - // Use a timeout to handle cases where the generator goroutine may have exited - // unexpectedly (e.g., due to panic or other runtime errors). abort := make(chan *generatorStats) select { case genAbort <- abort: // Generator received the abort signal, wait for it to respond <-abort case <-time.After(5 * time.Second): - log.Warn("Snapshot generator did not respond to stop signal, it may have crashed") + log.Error("Snapshot generator did not respond despite being marked as running") } } diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 01fb55ea4c..a40d7ff711 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -648,6 +648,9 @@ func generateAccounts(ctx *generatorContext, dl *diskLayer, accMarker []byte) er // gathering and logging, since the method surfs the blocks as they arrive, often // being restarted. func (dl *diskLayer) generate(stats *generatorStats) { + dl.genRunning.Store(true) + defer dl.genRunning.Store(false) + var ( accMarker []byte abort chan *generatorStats