core: fix sync reset in pruned nodes (#31638)

This is an attempt at fixing #31601. I think what happens is the startup
logic will try to get the full block body (it's `bc.loadLastState`) and
fail because genesis block has been pruned from the freezer. This will
cause it to keep repeating the reset logic, causing a deadlock.

This can happen when due to an unsuccessful sync we don't have the state
for the head (or any other state) fully, and try to redo the snap sync.

---------

Co-authored-by: Gary Rong <garyrong0905@gmail.com>
This commit is contained in:
Sina M 2025-04-17 10:32:40 +02:00 committed by GitHub
parent cb21177aa8
commit e444823394
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 60 additions and 23 deletions

View file

@ -514,19 +514,33 @@ func (bc *BlockChain) loadLastState() error {
log.Warn("Empty database, resetting chain")
return bc.Reset()
}
// Make sure the entire head block is available
headBlock := bc.GetBlockByHash(head)
headHeader := bc.GetHeaderByHash(head)
if headHeader == nil {
// Corrupt or empty database, init from scratch
log.Warn("Head header missing, resetting chain", "hash", head)
return bc.Reset()
}
var headBlock *types.Block
if cmp := headHeader.Number.Cmp(new(big.Int)); cmp == 1 {
// Make sure the entire head block is available.
headBlock = bc.GetBlockByHash(head)
} else if cmp == 0 {
// On a pruned node the block body might not be available. But a pruned
// block should never be the head block. The only exception is when, as
// a last resort, chain is reset to genesis.
headBlock = bc.genesisBlock
}
if headBlock == nil {
// Corrupt or empty database, init from scratch
log.Warn("Head block missing, resetting chain", "hash", head)
return bc.Reset()
}
// Everything seems to be fine, set as the head block
bc.currentBlock.Store(headBlock.Header())
bc.currentBlock.Store(headHeader)
headBlockGauge.Update(int64(headBlock.NumberU64()))
// Restore the last known head header
headHeader := headBlock.Header()
if head := rawdb.ReadHeadHeaderHash(bc.db); head != (common.Hash{}) {
if header := bc.GetHeaderByHash(head); header != nil {
headHeader = header
@ -642,11 +656,15 @@ func (bc *BlockChain) SetHead(head uint64) error {
// Send chain head event to update the transaction pool
header := bc.CurrentBlock()
if block := bc.GetBlock(header.Hash(), header.Number.Uint64()); block == nil {
// This should never happen. In practice, previously currentBlock
// contained the entire block whereas now only a "marker", so there
// is an ever so slight chance for a race we should handle.
log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash())
return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4])
// In a pruned node the genesis block will not exist in the freezer.
// It should not happen that we set head to any other pruned block.
if header.Number.Uint64() > 0 {
// This should never happen. In practice, previously currentBlock
// contained the entire block whereas now only a "marker", so there
// is an ever so slight chance for a race we should handle.
log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash())
return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4])
}
}
bc.chainHeadFeed.Send(ChainHeadEvent{Header: header})
return nil
@ -663,11 +681,15 @@ func (bc *BlockChain) SetHeadWithTimestamp(timestamp uint64) error {
// Send chain head event to update the transaction pool
header := bc.CurrentBlock()
if block := bc.GetBlock(header.Hash(), header.Number.Uint64()); block == nil {
// This should never happen. In practice, previously currentBlock
// contained the entire block whereas now only a "marker", so there
// is an ever so slight chance for a race we should handle.
log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash())
return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4])
// In a pruned node the genesis block will not exist in the freezer.
// It should not happen that we set head to any other pruned block.
if header.Number.Uint64() > 0 {
// This should never happen. In practice, previously currentBlock
// contained the entire block whereas now only a "marker", so there
// is an ever so slight chance for a race we should handle.
log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash())
return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4])
}
}
bc.chainHeadFeed.Send(ChainHeadEvent{Header: header})
return nil

View file

@ -196,6 +196,19 @@ func (indexer *txIndexer) repair(head uint64) {
}
}
// resolveHead resolves the block number of the current chain head.
func (indexer *txIndexer) resolveHead() uint64 {
headBlockHash := rawdb.ReadHeadBlockHash(indexer.db)
if headBlockHash == (common.Hash{}) {
return 0
}
headBlockNumber := rawdb.ReadHeaderNumber(indexer.db, headBlockHash)
if headBlockNumber == nil {
return 0
}
return *headBlockNumber
}
// loop is the scheduler of the indexer, assigning indexing/unindexing tasks depending
// on the received chain event.
func (indexer *txIndexer) loop(chain *BlockChain) {
@ -203,9 +216,9 @@ func (indexer *txIndexer) loop(chain *BlockChain) {
// Listening to chain events and manipulate the transaction indexes.
var (
stop chan struct{} // Non-nil if background routine is active
done chan struct{} // Non-nil if background routine is active
head = rawdb.ReadHeadBlock(indexer.db).NumberU64() // The latest announced chain head
stop chan struct{} // Non-nil if background routine is active
done chan struct{} // Non-nil if background routine is active
head = indexer.resolveHead() // The latest announced chain head
headCh = make(chan ChainHeadEvent)
sub = chain.SubscribeChainHeadEvent(headCh)

View file

@ -186,13 +186,15 @@ func (p *TxPool) loop(head *types.Header) {
// Try to inject a busy marker and start a reset if successful
select {
case resetBusy <- struct{}{}:
statedb, err := p.chain.StateAt(newHead.Root)
if err != nil {
log.Crit("Failed to reset txpool state", "err", err)
// Updates the statedb with the new chain head. The head state may be
// unavailable if the initial state sync has not yet completed.
if statedb, err := p.chain.StateAt(newHead.Root); err != nil {
log.Error("Failed to reset txpool state", "err", err)
} else {
p.stateLock.Lock()
p.state = statedb
p.stateLock.Unlock()
}
p.stateLock.Lock()
p.state = statedb
p.stateLock.Unlock()
// Busy marker injected, start a new subpool reset
go func(oldHead, newHead *types.Header) {