mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-02-26 15:47:21 +00:00
core: fix sync reset in pruned nodes (#31638)
This is an attempt at fixing #31601. I think what happens is the startup logic will try to get the full block body (it's `bc.loadLastState`) and fail because genesis block has been pruned from the freezer. This will cause it to keep repeating the reset logic, causing a deadlock. This can happen when due to an unsuccessful sync we don't have the state for the head (or any other state) fully, and try to redo the snap sync. --------- Co-authored-by: Gary Rong <garyrong0905@gmail.com>
This commit is contained in:
parent
cb21177aa8
commit
e444823394
3 changed files with 60 additions and 23 deletions
|
|
@ -514,19 +514,33 @@ func (bc *BlockChain) loadLastState() error {
|
|||
log.Warn("Empty database, resetting chain")
|
||||
return bc.Reset()
|
||||
}
|
||||
// Make sure the entire head block is available
|
||||
headBlock := bc.GetBlockByHash(head)
|
||||
headHeader := bc.GetHeaderByHash(head)
|
||||
if headHeader == nil {
|
||||
// Corrupt or empty database, init from scratch
|
||||
log.Warn("Head header missing, resetting chain", "hash", head)
|
||||
return bc.Reset()
|
||||
}
|
||||
|
||||
var headBlock *types.Block
|
||||
if cmp := headHeader.Number.Cmp(new(big.Int)); cmp == 1 {
|
||||
// Make sure the entire head block is available.
|
||||
headBlock = bc.GetBlockByHash(head)
|
||||
} else if cmp == 0 {
|
||||
// On a pruned node the block body might not be available. But a pruned
|
||||
// block should never be the head block. The only exception is when, as
|
||||
// a last resort, chain is reset to genesis.
|
||||
headBlock = bc.genesisBlock
|
||||
}
|
||||
if headBlock == nil {
|
||||
// Corrupt or empty database, init from scratch
|
||||
log.Warn("Head block missing, resetting chain", "hash", head)
|
||||
return bc.Reset()
|
||||
}
|
||||
// Everything seems to be fine, set as the head block
|
||||
bc.currentBlock.Store(headBlock.Header())
|
||||
bc.currentBlock.Store(headHeader)
|
||||
headBlockGauge.Update(int64(headBlock.NumberU64()))
|
||||
|
||||
// Restore the last known head header
|
||||
headHeader := headBlock.Header()
|
||||
if head := rawdb.ReadHeadHeaderHash(bc.db); head != (common.Hash{}) {
|
||||
if header := bc.GetHeaderByHash(head); header != nil {
|
||||
headHeader = header
|
||||
|
|
@ -642,11 +656,15 @@ func (bc *BlockChain) SetHead(head uint64) error {
|
|||
// Send chain head event to update the transaction pool
|
||||
header := bc.CurrentBlock()
|
||||
if block := bc.GetBlock(header.Hash(), header.Number.Uint64()); block == nil {
|
||||
// This should never happen. In practice, previously currentBlock
|
||||
// contained the entire block whereas now only a "marker", so there
|
||||
// is an ever so slight chance for a race we should handle.
|
||||
log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash())
|
||||
return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4])
|
||||
// In a pruned node the genesis block will not exist in the freezer.
|
||||
// It should not happen that we set head to any other pruned block.
|
||||
if header.Number.Uint64() > 0 {
|
||||
// This should never happen. In practice, previously currentBlock
|
||||
// contained the entire block whereas now only a "marker", so there
|
||||
// is an ever so slight chance for a race we should handle.
|
||||
log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash())
|
||||
return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4])
|
||||
}
|
||||
}
|
||||
bc.chainHeadFeed.Send(ChainHeadEvent{Header: header})
|
||||
return nil
|
||||
|
|
@ -663,11 +681,15 @@ func (bc *BlockChain) SetHeadWithTimestamp(timestamp uint64) error {
|
|||
// Send chain head event to update the transaction pool
|
||||
header := bc.CurrentBlock()
|
||||
if block := bc.GetBlock(header.Hash(), header.Number.Uint64()); block == nil {
|
||||
// This should never happen. In practice, previously currentBlock
|
||||
// contained the entire block whereas now only a "marker", so there
|
||||
// is an ever so slight chance for a race we should handle.
|
||||
log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash())
|
||||
return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4])
|
||||
// In a pruned node the genesis block will not exist in the freezer.
|
||||
// It should not happen that we set head to any other pruned block.
|
||||
if header.Number.Uint64() > 0 {
|
||||
// This should never happen. In practice, previously currentBlock
|
||||
// contained the entire block whereas now only a "marker", so there
|
||||
// is an ever so slight chance for a race we should handle.
|
||||
log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash())
|
||||
return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4])
|
||||
}
|
||||
}
|
||||
bc.chainHeadFeed.Send(ChainHeadEvent{Header: header})
|
||||
return nil
|
||||
|
|
|
|||
|
|
@ -196,6 +196,19 @@ func (indexer *txIndexer) repair(head uint64) {
|
|||
}
|
||||
}
|
||||
|
||||
// resolveHead resolves the block number of the current chain head.
|
||||
func (indexer *txIndexer) resolveHead() uint64 {
|
||||
headBlockHash := rawdb.ReadHeadBlockHash(indexer.db)
|
||||
if headBlockHash == (common.Hash{}) {
|
||||
return 0
|
||||
}
|
||||
headBlockNumber := rawdb.ReadHeaderNumber(indexer.db, headBlockHash)
|
||||
if headBlockNumber == nil {
|
||||
return 0
|
||||
}
|
||||
return *headBlockNumber
|
||||
}
|
||||
|
||||
// loop is the scheduler of the indexer, assigning indexing/unindexing tasks depending
|
||||
// on the received chain event.
|
||||
func (indexer *txIndexer) loop(chain *BlockChain) {
|
||||
|
|
@ -203,9 +216,9 @@ func (indexer *txIndexer) loop(chain *BlockChain) {
|
|||
|
||||
// Listening to chain events and manipulate the transaction indexes.
|
||||
var (
|
||||
stop chan struct{} // Non-nil if background routine is active
|
||||
done chan struct{} // Non-nil if background routine is active
|
||||
head = rawdb.ReadHeadBlock(indexer.db).NumberU64() // The latest announced chain head
|
||||
stop chan struct{} // Non-nil if background routine is active
|
||||
done chan struct{} // Non-nil if background routine is active
|
||||
head = indexer.resolveHead() // The latest announced chain head
|
||||
|
||||
headCh = make(chan ChainHeadEvent)
|
||||
sub = chain.SubscribeChainHeadEvent(headCh)
|
||||
|
|
|
|||
|
|
@ -186,13 +186,15 @@ func (p *TxPool) loop(head *types.Header) {
|
|||
// Try to inject a busy marker and start a reset if successful
|
||||
select {
|
||||
case resetBusy <- struct{}{}:
|
||||
statedb, err := p.chain.StateAt(newHead.Root)
|
||||
if err != nil {
|
||||
log.Crit("Failed to reset txpool state", "err", err)
|
||||
// Updates the statedb with the new chain head. The head state may be
|
||||
// unavailable if the initial state sync has not yet completed.
|
||||
if statedb, err := p.chain.StateAt(newHead.Root); err != nil {
|
||||
log.Error("Failed to reset txpool state", "err", err)
|
||||
} else {
|
||||
p.stateLock.Lock()
|
||||
p.state = statedb
|
||||
p.stateLock.Unlock()
|
||||
}
|
||||
p.stateLock.Lock()
|
||||
p.state = statedb
|
||||
p.stateLock.Unlock()
|
||||
|
||||
// Busy marker injected, start a new subpool reset
|
||||
go func(oldHead, newHead *types.Header) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue