triedb/pathdb: refactor state history write (#32497)

This pull request refactors the internal implementation in path database
a bit, specifically:

- purge the state index data in batch
- simplify the logic of state history construction and index, make it more readable
This commit is contained in:
rjl493456442 2025-08-26 21:53:55 +08:00 committed by GitHub
parent f877183cbb
commit 95ab643bb8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 89 additions and 62 deletions

View file

@ -318,13 +318,14 @@ func (db *Database) repairHistory() error {
log.Crit("Failed to retrieve head of state history", "err", err)
}
if frozen != 0 {
// TODO(rjl493456442) would be better to group them into a batch.
//
// Purge all state history indexing data first
rawdb.DeleteStateHistoryIndexMetadata(db.diskdb)
rawdb.DeleteStateHistoryIndex(db.diskdb)
err := db.stateFreezer.Reset()
if err != nil {
batch := db.diskdb.NewBatch()
rawdb.DeleteStateHistoryIndexMetadata(batch)
rawdb.DeleteStateHistoryIndex(batch)
if err := batch.Write(); err != nil {
log.Crit("Failed to purge state history index", "err", err)
}
if err := db.stateFreezer.Reset(); err != nil {
log.Crit("Failed to reset state histories", "err", err)
}
log.Info("Truncated extraneous state history")
@ -510,11 +511,13 @@ func (db *Database) Enable(root common.Hash) error {
// mappings can be huge and might take a while to clear
// them, just leave them in disk and wait for overwriting.
if db.stateFreezer != nil {
// TODO(rjl493456442) would be better to group them into a batch.
//
// Purge all state history indexing data first
rawdb.DeleteStateHistoryIndexMetadata(db.diskdb)
rawdb.DeleteStateHistoryIndex(db.diskdb)
batch.Reset()
rawdb.DeleteStateHistoryIndexMetadata(batch)
rawdb.DeleteStateHistoryIndex(batch)
if err := batch.Write(); err != nil {
return err
}
if err := db.stateFreezer.Reset(); err != nil {
return err
}

View file

@ -323,6 +323,69 @@ func (dl *diskLayer) update(root common.Hash, id uint64, block uint64, nodes *no
return newDiffLayer(dl, root, id, block, nodes, states)
}
// writeStateHistory stores the state history and indexes if indexing is
// permitted.
//
// What's more, this function also returns a flag indicating whether the
// buffer flushing is required, ensuring the persistent state ID is always
// greater than or equal to the first history ID.
func (dl *diskLayer) writeStateHistory(diff *diffLayer) (bool, error) {
// Short circuit if state history is not permitted
if dl.db.stateFreezer == nil {
return false, nil
}
// Bail out with an error if writing the state history fails.
// This can happen, for example, if the device is full.
err := writeStateHistory(dl.db.stateFreezer, diff)
if err != nil {
return false, err
}
// Notify the state history indexer for newly created history
if dl.db.stateIndexer != nil {
if err := dl.db.stateIndexer.extend(diff.stateID()); err != nil {
return false, err
}
}
// Determine if the persisted history object has exceeded the
// configured limitation.
limit := dl.db.config.StateHistory
if limit == 0 {
return false, nil
}
tail, err := dl.db.stateFreezer.Tail()
if err != nil {
return false, err
} // firstID = tail+1
// length = diff.stateID()-firstID+1 = diff.stateID()-tail
if diff.stateID()-tail <= limit {
return false, nil
}
newFirst := diff.stateID() - limit + 1 // the id of first history **after truncation**
// In a rare case where the ID of the first history object (after tail
// truncation) exceeds the persisted state ID, we must take corrective
// steps:
//
// - Skip tail truncation temporarily, avoid the scenario that associated
// history of persistent state is removed
//
// - Force a commit of the cached dirty states into persistent state
//
// These measures ensure the persisted state ID always remains greater
// than or equal to the first history ID.
if persistentID := rawdb.ReadPersistentStateID(dl.db.diskdb); persistentID < newFirst {
log.Debug("Skip tail truncation", "persistentID", persistentID, "tailID", tail+1, "headID", diff.stateID(), "limit", limit)
return true, nil
}
pruned, err := truncateFromTail(dl.db.diskdb, dl.db.stateFreezer, newFirst-1)
if err != nil {
return false, err
}
log.Debug("Pruned state history", "items", pruned, "tailid", newFirst)
return false, nil
}
// commit merges the given bottom-most diff layer into the node buffer
// and returns a newly constructed disk layer. Note the current disk
// layer must be tagged as stale first to prevent re-access.
@ -333,34 +396,9 @@ func (dl *diskLayer) commit(bottom *diffLayer, force bool) (*diskLayer, error) {
// Construct and store the state history first. If crash happens after storing
// the state history but without flushing the corresponding states(journal),
// the stored state history will be truncated from head in the next restart.
var (
overflow bool
oldest uint64
)
if dl.db.stateFreezer != nil {
// Bail out with an error if writing the state history fails.
// This can happen, for example, if the device is full.
err := writeStateHistory(dl.db.stateFreezer, bottom)
if err != nil {
return nil, err
}
// Determine if the persisted history object has exceeded the configured
// limitation, set the overflow as true if so.
tail, err := dl.db.stateFreezer.Tail()
if err != nil {
return nil, err
}
limit := dl.db.config.StateHistory
if limit != 0 && bottom.stateID()-tail > limit {
overflow = true
oldest = bottom.stateID() - limit + 1 // track the id of history **after truncation**
}
// Notify the state history indexer for newly created history
if dl.db.stateIndexer != nil {
if err := dl.db.stateIndexer.extend(bottom.stateID()); err != nil {
return nil, err
}
}
flush, err := dl.writeStateHistory(bottom)
if err != nil {
return nil, err
}
// Mark the diskLayer as stale before applying any mutations on top.
dl.stale = true
@ -373,21 +411,13 @@ func (dl *diskLayer) commit(bottom *diffLayer, force bool) (*diskLayer, error) {
}
rawdb.WriteStateID(dl.db.diskdb, bottom.rootHash(), bottom.stateID())
// In a unique scenario where the ID of the oldest history object (after tail
// truncation) surpasses the persisted state ID, we take the necessary action
// of forcibly committing the cached dirty states to ensure that the persisted
// state ID remains higher.
persistedID := rawdb.ReadPersistentStateID(dl.db.diskdb)
if !force && persistedID < oldest {
force = true
}
// Merge the trie nodes and flat states of the bottom-most diff layer into the
// buffer as the combined layer.
combined := dl.buffer.commit(bottom.nodes, bottom.states.stateSet)
// Terminate the background state snapshot generation before mutating the
// persistent state.
if combined.full() || force {
if combined.full() || force || flush {
// Wait until the previous frozen buffer is fully flushed
if dl.frozen != nil {
if err := dl.frozen.waitFlush(); err != nil {
@ -431,8 +461,8 @@ func (dl *diskLayer) commit(bottom *diffLayer, force bool) (*diskLayer, error) {
}
})
// Block until the frozen buffer is fully flushed out if the async flushing
// is not allowed, or if the oldest history surpasses the persisted state ID.
if dl.db.config.NoAsyncFlush || persistedID < oldest {
// is not allowed.
if dl.db.config.NoAsyncFlush {
if err := dl.frozen.waitFlush(); err != nil {
return nil, err
}
@ -445,15 +475,6 @@ func (dl *diskLayer) commit(bottom *diffLayer, force bool) (*diskLayer, error) {
if dl.generator != nil {
ndl.setGenerator(dl.generator)
}
// To remove outdated history objects from the end, we set the 'tail' parameter
// to 'oldest-1' due to the offset between the freezer index and the history ID.
if overflow {
pruned, err := truncateFromTail(ndl.db.diskdb, ndl.db.stateFreezer, oldest-1)
if err != nil {
return nil, err
}
log.Debug("Pruned state history", "items", pruned, "tailid", oldest)
}
return ndl, nil
}

View file

@ -598,14 +598,17 @@ func checkVersion(disk ethdb.KeyValueStore) {
if err == nil && m.Version == stateIndexVersion {
return
}
// TODO(rjl493456442) would be better to group them into a batch.
rawdb.DeleteStateHistoryIndexMetadata(disk)
rawdb.DeleteStateHistoryIndex(disk)
version := "unknown"
if err == nil {
version = fmt.Sprintf("%d", m.Version)
}
batch := disk.NewBatch()
rawdb.DeleteStateHistoryIndexMetadata(batch)
rawdb.DeleteStateHistoryIndex(batch)
if err := batch.Write(); err != nil {
log.Crit("Failed to purge state history index", "err", err)
}
log.Info("Cleaned up obsolete state history index", "version", version, "want", stateIndexVersion)
}