From e4448233940904e9c36f227b71f274c9df80a250 Mon Sep 17 00:00:00 2001 From: Sina M <1591639+s1na@users.noreply.github.com> Date: Thu, 17 Apr 2025 10:32:40 +0200 Subject: [PATCH] core: fix sync reset in pruned nodes (#31638) This is an attempt at fixing #31601. I think what happens is the startup logic will try to get the full block body (it's `bc.loadLastState`) and fail because genesis block has been pruned from the freezer. This will cause it to keep repeating the reset logic, causing a deadlock. This can happen when due to an unsuccessful sync we don't have the state for the head (or any other state) fully, and try to redo the snap sync. --------- Co-authored-by: Gary Rong --- core/blockchain.go | 50 +++++++++++++++++++++++++++++++------------ core/txindexer.go | 19 +++++++++++++--- core/txpool/txpool.go | 14 ++++++------ 3 files changed, 60 insertions(+), 23 deletions(-) diff --git a/core/blockchain.go b/core/blockchain.go index 203dcd2693..c4caec66ed 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -514,19 +514,33 @@ func (bc *BlockChain) loadLastState() error { log.Warn("Empty database, resetting chain") return bc.Reset() } - // Make sure the entire head block is available - headBlock := bc.GetBlockByHash(head) + headHeader := bc.GetHeaderByHash(head) + if headHeader == nil { + // Corrupt or empty database, init from scratch + log.Warn("Head header missing, resetting chain", "hash", head) + return bc.Reset() + } + + var headBlock *types.Block + if cmp := headHeader.Number.Cmp(new(big.Int)); cmp == 1 { + // Make sure the entire head block is available. + headBlock = bc.GetBlockByHash(head) + } else if cmp == 0 { + // On a pruned node the block body might not be available. But a pruned + // block should never be the head block. The only exception is when, as + // a last resort, chain is reset to genesis. + headBlock = bc.genesisBlock + } if headBlock == nil { // Corrupt or empty database, init from scratch log.Warn("Head block missing, resetting chain", "hash", head) return bc.Reset() } // Everything seems to be fine, set as the head block - bc.currentBlock.Store(headBlock.Header()) + bc.currentBlock.Store(headHeader) headBlockGauge.Update(int64(headBlock.NumberU64())) // Restore the last known head header - headHeader := headBlock.Header() if head := rawdb.ReadHeadHeaderHash(bc.db); head != (common.Hash{}) { if header := bc.GetHeaderByHash(head); header != nil { headHeader = header @@ -642,11 +656,15 @@ func (bc *BlockChain) SetHead(head uint64) error { // Send chain head event to update the transaction pool header := bc.CurrentBlock() if block := bc.GetBlock(header.Hash(), header.Number.Uint64()); block == nil { - // This should never happen. In practice, previously currentBlock - // contained the entire block whereas now only a "marker", so there - // is an ever so slight chance for a race we should handle. - log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash()) - return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4]) + // In a pruned node the genesis block will not exist in the freezer. + // It should not happen that we set head to any other pruned block. + if header.Number.Uint64() > 0 { + // This should never happen. In practice, previously currentBlock + // contained the entire block whereas now only a "marker", so there + // is an ever so slight chance for a race we should handle. + log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash()) + return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4]) + } } bc.chainHeadFeed.Send(ChainHeadEvent{Header: header}) return nil @@ -663,11 +681,15 @@ func (bc *BlockChain) SetHeadWithTimestamp(timestamp uint64) error { // Send chain head event to update the transaction pool header := bc.CurrentBlock() if block := bc.GetBlock(header.Hash(), header.Number.Uint64()); block == nil { - // This should never happen. In practice, previously currentBlock - // contained the entire block whereas now only a "marker", so there - // is an ever so slight chance for a race we should handle. - log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash()) - return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4]) + // In a pruned node the genesis block will not exist in the freezer. + // It should not happen that we set head to any other pruned block. + if header.Number.Uint64() > 0 { + // This should never happen. In practice, previously currentBlock + // contained the entire block whereas now only a "marker", so there + // is an ever so slight chance for a race we should handle. + log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash()) + return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4]) + } } bc.chainHeadFeed.Send(ChainHeadEvent{Header: header}) return nil diff --git a/core/txindexer.go b/core/txindexer.go index 31f069995b..64a2e8c49f 100644 --- a/core/txindexer.go +++ b/core/txindexer.go @@ -196,6 +196,19 @@ func (indexer *txIndexer) repair(head uint64) { } } +// resolveHead resolves the block number of the current chain head. +func (indexer *txIndexer) resolveHead() uint64 { + headBlockHash := rawdb.ReadHeadBlockHash(indexer.db) + if headBlockHash == (common.Hash{}) { + return 0 + } + headBlockNumber := rawdb.ReadHeaderNumber(indexer.db, headBlockHash) + if headBlockNumber == nil { + return 0 + } + return *headBlockNumber +} + // loop is the scheduler of the indexer, assigning indexing/unindexing tasks depending // on the received chain event. func (indexer *txIndexer) loop(chain *BlockChain) { @@ -203,9 +216,9 @@ func (indexer *txIndexer) loop(chain *BlockChain) { // Listening to chain events and manipulate the transaction indexes. var ( - stop chan struct{} // Non-nil if background routine is active - done chan struct{} // Non-nil if background routine is active - head = rawdb.ReadHeadBlock(indexer.db).NumberU64() // The latest announced chain head + stop chan struct{} // Non-nil if background routine is active + done chan struct{} // Non-nil if background routine is active + head = indexer.resolveHead() // The latest announced chain head headCh = make(chan ChainHeadEvent) sub = chain.SubscribeChainHeadEvent(headCh) diff --git a/core/txpool/txpool.go b/core/txpool/txpool.go index 2ed38772ce..fc4a7be6d2 100644 --- a/core/txpool/txpool.go +++ b/core/txpool/txpool.go @@ -186,13 +186,15 @@ func (p *TxPool) loop(head *types.Header) { // Try to inject a busy marker and start a reset if successful select { case resetBusy <- struct{}{}: - statedb, err := p.chain.StateAt(newHead.Root) - if err != nil { - log.Crit("Failed to reset txpool state", "err", err) + // Updates the statedb with the new chain head. The head state may be + // unavailable if the initial state sync has not yet completed. + if statedb, err := p.chain.StateAt(newHead.Root); err != nil { + log.Error("Failed to reset txpool state", "err", err) + } else { + p.stateLock.Lock() + p.state = statedb + p.stateLock.Unlock() } - p.stateLock.Lock() - p.state = statedb - p.stateLock.Unlock() // Busy marker injected, start a new subpool reset go func(oldHead, newHead *types.Header) {