triedb/pathdb: make batch with pre-allocated size (#32914)

In this PR, the database batch for writing the history index data is pre-allocated. It's observed that database batch repeatedly grows the size of the mega-batch, causing significant memory allocation pressure. This approach can effectively mitigate the overhead.
2026-04-23 01:52:23 +00:00 · 2025-10-21 19:11:36 +08:00 · 2025-10-21 19:11:36 +08:00 · 0a8b820725
commit 0a8b820725
parent 79b6a56d3a
2 changed files with 26 additions and 5 deletions
--- a/triedb/pathdb/history_index_block.go
+++ b/triedb/pathdb/history_index_block.go
@ -25,10 +25,10 @@ import (
 )

 const (
-	indexBlockDescSize   = 14        // The size of index block descriptor
-	indexBlockEntriesCap = 4096      // The maximum number of entries can be grouped in a block
-	indexBlockRestartLen = 256       // The restart interval length of index block
-	historyIndexBatch    = 1_000_000 // The number of state history indexes for constructing or deleting as batch
+	indexBlockDescSize   = 14         // The size of index block descriptor
+	indexBlockEntriesCap = 4096       // The maximum number of entries can be grouped in a block
+	indexBlockRestartLen = 256        // The restart interval length of index block
+	historyIndexBatch    = 512 * 1024 // The number of state history indexes for constructing or deleting as batch
 )

 // indexBlockDesc represents a descriptor for an index block, which contains a
--- a/triedb/pathdb/history_indexer.go
+++ b/triedb/pathdb/history_indexer.go
@ -40,6 +40,11 @@ const (
 	stateHistoryIndexVersion    = stateHistoryIndexV0    // the current state index version
 	trienodeHistoryIndexV0      = uint8(0)               // initial version of trienode index structure
 	trienodeHistoryIndexVersion = trienodeHistoryIndexV0 // the current trienode index version
+
+	// estimations for calculating the batch size for atomic database commit
+	estimatedStateHistoryIndexSize    = 3  // The average size of each state history index entry is approximately 2–3 bytes
+	estimatedTrienodeHistoryIndexSize = 3  // The average size of each trienode history index entry is approximately 2-3 bytes
+	estimatedIndexBatchSizeFactor     = 32 // The factor counts for the write amplification for each entry
 )

 // indexVersion returns the latest index version for the given history type.
@ -150,6 +155,22 @@ func (b *batchIndexer) process(h history, id uint64) error {
 	return b.finish(false)
 }

+// makeBatch constructs a database batch based on the number of pending entries.
+// The batch size is roughly estimated to minimize repeated resizing rounds,
+// as accurately predicting the exact size is technically challenging.
+func (b *batchIndexer) makeBatch() ethdb.Batch {
+	var size int
+	switch b.typ {
+	case typeStateHistory:
+		size = estimatedStateHistoryIndexSize
+	case typeTrienodeHistory:
+		size = estimatedTrienodeHistoryIndexSize
+	default:
+		panic(fmt.Sprintf("unknown history type %d", b.typ))
+	}
+	return b.db.NewBatchWithSize(size * estimatedIndexBatchSizeFactor * b.pending)
+}
+
 // finish writes the accumulated state indexes into the disk if either the
 // memory limitation is reached or it's requested forcibly.
 func (b *batchIndexer) finish(force bool) error {
@ -160,7 +181,7 @@ func (b *batchIndexer) finish(force bool) error {
 		return nil
 	}
 	var (
-		batch   = b.db.NewBatch()
+		batch   = b.makeBatch()
 		batchMu sync.RWMutex
 		start   = time.Now()
 		eg      errgroup.Group