triedb/pathdb: make batch with pre-allocated size (#32914)

In this PR, the database batch for writing the history index data is
pre-allocated.

It's observed that database batch repeatedly grows the size of the
mega-batch,
causing significant memory allocation pressure. This approach can
effectively
mitigate the overhead.
This commit is contained in:
rjl493456442 2025-10-21 19:11:36 +08:00 committed by GitHub
parent 79b6a56d3a
commit 0a8b820725
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 26 additions and 5 deletions

View file

@ -25,10 +25,10 @@ import (
)
const (
indexBlockDescSize = 14 // The size of index block descriptor
indexBlockEntriesCap = 4096 // The maximum number of entries can be grouped in a block
indexBlockRestartLen = 256 // The restart interval length of index block
historyIndexBatch = 1_000_000 // The number of state history indexes for constructing or deleting as batch
indexBlockDescSize = 14 // The size of index block descriptor
indexBlockEntriesCap = 4096 // The maximum number of entries can be grouped in a block
indexBlockRestartLen = 256 // The restart interval length of index block
historyIndexBatch = 512 * 1024 // The number of state history indexes for constructing or deleting as batch
)
// indexBlockDesc represents a descriptor for an index block, which contains a

View file

@ -40,6 +40,11 @@ const (
stateHistoryIndexVersion = stateHistoryIndexV0 // the current state index version
trienodeHistoryIndexV0 = uint8(0) // initial version of trienode index structure
trienodeHistoryIndexVersion = trienodeHistoryIndexV0 // the current trienode index version
// estimations for calculating the batch size for atomic database commit
estimatedStateHistoryIndexSize = 3 // The average size of each state history index entry is approximately 23 bytes
estimatedTrienodeHistoryIndexSize = 3 // The average size of each trienode history index entry is approximately 2-3 bytes
estimatedIndexBatchSizeFactor = 32 // The factor counts for the write amplification for each entry
)
// indexVersion returns the latest index version for the given history type.
@ -150,6 +155,22 @@ func (b *batchIndexer) process(h history, id uint64) error {
return b.finish(false)
}
// makeBatch constructs a database batch based on the number of pending entries.
// The batch size is roughly estimated to minimize repeated resizing rounds,
// as accurately predicting the exact size is technically challenging.
func (b *batchIndexer) makeBatch() ethdb.Batch {
var size int
switch b.typ {
case typeStateHistory:
size = estimatedStateHistoryIndexSize
case typeTrienodeHistory:
size = estimatedTrienodeHistoryIndexSize
default:
panic(fmt.Sprintf("unknown history type %d", b.typ))
}
return b.db.NewBatchWithSize(size * estimatedIndexBatchSizeFactor * b.pending)
}
// finish writes the accumulated state indexes into the disk if either the
// memory limitation is reached or it's requested forcibly.
func (b *batchIndexer) finish(force bool) error {
@ -160,7 +181,7 @@ func (b *batchIndexer) finish(force bool) error {
return nil
}
var (
batch = b.db.NewBatch()
batch = b.makeBatch()
batchMu sync.RWMutex
start = time.Now()
eg errgroup.Group