mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-04-01 15:45:55 +00:00
beacon/light: keep retrying checkpoint init if failed (#33966)
This PR changes the blsync checkpoint init logic so that even if the initialization fails with a certain server and an error log message is printed, the server goes back to its initial state and is allowed to retry initialization after the failure delay period. The previous logic had an `ssDone` server state that did put the server in a permanently unusable state once the checkpoint init failed for an apparently permanent reason. This was not the correct behavior because different servers behave differently in case of overload and sometimes the response to a permanently missing item is not clearly distinguishable from an overload response. A safer logic is to never assume anything to be permanent and always give a chance to retry. The failure delay formula is also fixed; now it is properly capped at `maxFailureDelay`. The previous formula did allow the delay to grow unlimited if a retry was attempted immediately after each delay period.
This commit is contained in:
parent
92b4cb2663
commit
fc43170cdd
2 changed files with 3 additions and 6 deletions
|
|
@ -438,14 +438,11 @@ func (s *serverWithLimits) fail(desc string) {
|
|||
// failLocked calculates the dynamic failure delay and applies it.
|
||||
func (s *serverWithLimits) failLocked(desc string) {
|
||||
log.Debug("Server error", "description", desc)
|
||||
s.failureDelay *= 2
|
||||
now := s.clock.Now()
|
||||
if now > s.failureDelayEnd {
|
||||
s.failureDelay *= math.Pow(2, -float64(now-s.failureDelayEnd)/float64(maxFailureDelay))
|
||||
}
|
||||
if s.failureDelay < float64(minFailureDelay) {
|
||||
s.failureDelay = float64(minFailureDelay)
|
||||
}
|
||||
s.failureDelay = max(min(s.failureDelay*2, float64(maxFailureDelay)), float64(minFailureDelay))
|
||||
s.failureDelayEnd = now + mclock.AbsTime(s.failureDelay)
|
||||
s.delay(time.Duration(s.failureDelay))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,7 +62,6 @@ const (
|
|||
ssNeedParent // cp header slot %32 != 0, need parent to check epoch boundary
|
||||
ssParentRequested // cp parent header requested
|
||||
ssPrintStatus // has all necessary info, print log message if init still not successful
|
||||
ssDone // log message printed, no more action required
|
||||
)
|
||||
|
||||
type serverState struct {
|
||||
|
|
@ -180,7 +179,8 @@ func (s *CheckpointInit) Process(requester request.Requester, events []request.E
|
|||
default:
|
||||
log.Error("blsync: checkpoint not available, but reported as finalized; specified checkpoint hash might be too old", "server", server.Name())
|
||||
}
|
||||
s.serverState[server] = serverState{state: ssDone}
|
||||
s.serverState[server] = serverState{state: ssDefault}
|
||||
requester.Fail(server, "checkpoint init failed")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue