From 588dd94aadca36d8a55a44457ff31dd480073a97 Mon Sep 17 00:00:00 2001
From: rjl493456442 <garyrong0905@gmail.com>
Date: Sat, 17 Jan 2026 20:28:37 +0800
Subject: [PATCH] triedb/pathdb: implement trienode history indexing scheme
 (#33551)

This PR implements the indexing scheme for trie node history. Check
https://github.com/ethereum/go-ethereum/pull/33399 for more details
---
 cmd/keeper/go.mod                            |   1 +
 triedb/pathdb/database_test.go               |   4 +-
 triedb/pathdb/history.go                     |  79 +++-
 triedb/pathdb/history_indexer.go             |  34 +-
 triedb/pathdb/history_reader.go              |  75 +--
 triedb/pathdb/history_reader_test.go         |   4 +-
 triedb/pathdb/history_state.go               |   8 +-
 triedb/pathdb/history_trienode.go            |  32 +-
 triedb/pathdb/history_trienode_test.go       |  46 --
 triedb/pathdb/history_trienode_utils.go      | 238 ++++++++++
 triedb/pathdb/history_trienode_utils_test.go | 458 +++++++++++++++++++
 triedb/pathdb/reader.go                      |   4 +-
 12 files changed, 870 insertions(+), 113 deletions(-)

diff --git a/cmd/keeper/go.mod b/cmd/keeper/go.mod
index cee1ce05a7..21cdfe8c33 100644
--- a/cmd/keeper/go.mod
+++ b/cmd/keeper/go.mod
@@ -33,6 +33,7 @@ require (
 	github.com/tklauser/go-sysconf v0.3.12 // indirect
 	github.com/tklauser/numcpus v0.6.1 // indirect
 	golang.org/x/crypto v0.36.0 // indirect
+	golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df // indirect
 	golang.org/x/sync v0.12.0 // indirect
 	golang.org/x/sys v0.39.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
diff --git a/triedb/pathdb/database_test.go b/triedb/pathdb/database_test.go
index 8cca7b1b3c..2d1819d08f 100644
--- a/triedb/pathdb/database_test.go
+++ b/triedb/pathdb/database_test.go
@@ -950,7 +950,7 @@ func TestDatabaseIndexRecovery(t *testing.T) {
 	var (
 		dIndex int
 		roots  = env.roots
-		hr     = newHistoryReader(env.db.diskdb, env.db.stateFreezer)
+		hr     = newStateHistoryReader(env.db.diskdb, env.db.stateFreezer)
 	)
 	for i, root := range roots {
 		if root == dRoot {
@@ -1011,7 +1011,7 @@ func TestDatabaseIndexRecovery(t *testing.T) {
 
 	// Ensure the truncated state histories become accessible
 	bRoot = env.db.tree.bottom().rootHash()
-	hr = newHistoryReader(env.db.diskdb, env.db.stateFreezer)
+	hr = newStateHistoryReader(env.db.diskdb, env.db.stateFreezer)
 	for i, root := range roots {
 		if root == bRoot {
 			break
diff --git a/triedb/pathdb/history.go b/triedb/pathdb/history.go
index d78999f218..86224ea5b2 100644
--- a/triedb/pathdb/history.go
+++ b/triedb/pathdb/history.go
@@ -121,6 +121,20 @@ func (ident stateIdent) String() string {
 	return ident.addressHash.Hex() + ident.path
 }
 
+func (ident stateIdent) bloomSize() int {
+	if ident.typ == typeAccount {
+		return 0
+	}
+	if ident.typ == typeStorage {
+		return 0
+	}
+	scheme := accountIndexScheme
+	if ident.addressHash != (common.Hash{}) {
+		scheme = storageIndexScheme
+	}
+	return scheme.getBitmapSize(len(ident.path))
+}
+
 // newAccountIdent constructs a state identifier for an account.
 func newAccountIdent(addressHash common.Hash) stateIdent {
 	return stateIdent{
@@ -143,6 +157,8 @@ func newStorageIdent(addressHash common.Hash, storageHash common.Hash) stateIden
 // newTrienodeIdent constructs a state identifier for a trie node.
 // The address denotes the address hash of the associated account;
 // the path denotes the path of the node within the trie;
+//
+// nolint:unused
 func newTrienodeIdent(addressHash common.Hash, path string) stateIdent {
 	return stateIdent{
 		typ:         typeTrienode,
@@ -180,17 +196,62 @@ func newStorageIdentQuery(address common.Address, addressHash common.Hash, stora
 	}
 }
 
-// newTrienodeIdentQuery constructs a state identifier for a trie node.
-// the addressHash denotes the address hash of the associated account;
-// the path denotes the path of the node within the trie;
-//
-// nolint:unused
-func newTrienodeIdentQuery(addrHash common.Hash, path []byte) stateIdentQuery {
-	return stateIdentQuery{
-		stateIdent: newTrienodeIdent(addrHash, string(path)),
+// indexElem defines the element for indexing.
+type indexElem interface {
+	key() stateIdent
+	ext() []uint16
+}
+
+type accountIndexElem struct {
+	addressHash common.Hash
+}
+
+func (a accountIndexElem) key() stateIdent {
+	return stateIdent{
+		typ:         typeAccount,
+		addressHash: a.addressHash,
 	}
 }
 
+func (a accountIndexElem) ext() []uint16 {
+	return nil
+}
+
+type storageIndexElem struct {
+	addressHash common.Hash
+	storageHash common.Hash
+}
+
+func (a storageIndexElem) key() stateIdent {
+	return stateIdent{
+		typ:         typeStorage,
+		addressHash: a.addressHash,
+		storageHash: a.storageHash,
+	}
+}
+
+func (a storageIndexElem) ext() []uint16 {
+	return nil
+}
+
+type trienodeIndexElem struct {
+	owner common.Hash
+	path  string
+	data  []uint16
+}
+
+func (a trienodeIndexElem) key() stateIdent {
+	return stateIdent{
+		typ:         typeTrienode,
+		addressHash: a.owner,
+		path:        a.path,
+	}
+}
+
+func (a trienodeIndexElem) ext() []uint16 {
+	return a.data
+}
+
 // history defines the interface of historical data, shared by stateHistory
 // and trienodeHistory.
 type history interface {
@@ -198,7 +259,7 @@ type history interface {
 	typ() historyType
 
 	// forEach returns an iterator to traverse the state entries in the history.
-	forEach() iter.Seq[stateIdent]
+	forEach() iter.Seq[indexElem]
 }
 
 var (
diff --git a/triedb/pathdb/history_indexer.go b/triedb/pathdb/history_indexer.go
index ddb4a293cc..18d71f6dae 100644
--- a/triedb/pathdb/history_indexer.go
+++ b/triedb/pathdb/history_indexer.go
@@ -29,6 +29,7 @@ import (
 	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/log"
 	"github.com/ethereum/go-ethereum/rlp"
+	"golang.org/x/exp/maps"
 	"golang.org/x/sync/errgroup"
 )
 
@@ -121,18 +122,20 @@ func deleteIndexMetadata(db ethdb.KeyValueWriter, typ historyType) {
 // batchIndexer is responsible for performing batch indexing or unindexing
 // of historical data (e.g., state or trie node changes) atomically.
 type batchIndexer struct {
-	index   map[stateIdent][]uint64 // List of history IDs for tracked state entry
-	pending int                     // Number of entries processed in the current batch.
-	delete  bool                    // Operation mode: true for unindex, false for index.
-	lastID  uint64                  // ID of the most recently processed history.
-	typ     historyType             // Type of history being processed (e.g., state or trienode).
-	db      ethdb.KeyValueStore     // Key-value database used to store or delete index data.
+	index   map[stateIdent][]uint64   // List of history IDs for tracked state entry
+	ext     map[stateIdent][][]uint16 // List of extension for each state element
+	pending int                       // Number of entries processed in the current batch.
+	delete  bool                      // Operation mode: true for unindex, false for index.
+	lastID  uint64                    // ID of the most recently processed history.
+	typ     historyType               // Type of history being processed (e.g., state or trienode).
+	db      ethdb.KeyValueStore       // Key-value database used to store or delete index data.
 }
 
 // newBatchIndexer constructs the batch indexer with the supplied mode.
 func newBatchIndexer(db ethdb.KeyValueStore, delete bool, typ historyType) *batchIndexer {
 	return &batchIndexer{
 		index:  make(map[stateIdent][]uint64),
+		ext:    make(map[stateIdent][][]uint16),
 		delete: delete,
 		typ:    typ,
 		db:     db,
@@ -142,8 +145,10 @@ func newBatchIndexer(db ethdb.KeyValueStore, delete bool, typ historyType) *batc
 // process traverses the state entries within the provided history and tracks the mutation
 // records for them.
 func (b *batchIndexer) process(h history, id uint64) error {
-	for ident := range h.forEach() {
-		b.index[ident] = append(b.index[ident], id)
+	for elem := range h.forEach() {
+		key := elem.key()
+		b.index[key] = append(b.index[key], id)
+		b.ext[key] = append(b.ext[key], elem.ext())
 		b.pending++
 	}
 	b.lastID = id
@@ -190,14 +195,15 @@ func (b *batchIndexer) finish(force bool) error {
 		indexed = metadata.Last
 	}
 	for ident, list := range b.index {
+		ext := b.ext[ident]
 		eg.Go(func() error {
 			if !b.delete {
-				iw, err := newIndexWriter(b.db, ident, indexed, 0)
+				iw, err := newIndexWriter(b.db, ident, indexed, ident.bloomSize())
 				if err != nil {
 					return err
 				}
-				for _, n := range list {
-					if err := iw.append(n, nil); err != nil {
+				for i, n := range list {
+					if err := iw.append(n, ext[i]); err != nil {
 						return err
 					}
 				}
@@ -205,7 +211,7 @@ func (b *batchIndexer) finish(force bool) error {
 					iw.finish(batch)
 				})
 			} else {
-				id, err := newIndexDeleter(b.db, ident, indexed, 0)
+				id, err := newIndexDeleter(b.db, ident, indexed, ident.bloomSize())
 				if err != nil {
 					return err
 				}
@@ -239,8 +245,10 @@ func (b *batchIndexer) finish(force bool) error {
 		return err
 	}
 	log.Debug("Committed batch indexer", "type", b.typ, "entries", len(b.index), "records", b.pending, "size", common.StorageSize(batchSize), "elapsed", common.PrettyDuration(time.Since(start)))
+
 	b.pending = 0
-	b.index = make(map[stateIdent][]uint64)
+	maps.Clear(b.index)
+	maps.Clear(b.ext)
 	return nil
 }
 
diff --git a/triedb/pathdb/history_reader.go b/triedb/pathdb/history_reader.go
index 69e7d5bd22..04cd869d2b 100644
--- a/triedb/pathdb/history_reader.go
+++ b/triedb/pathdb/history_reader.go
@@ -99,16 +99,17 @@ func (r *indexReaderWithLimitTag) readGreaterThan(id uint64, lastID uint64) (uin
 	return r.reader.readGreaterThan(id)
 }
 
-// historyReader is the structure to access historic state data.
-type historyReader struct {
+// stateHistoryReader is the structure to access historic state data.
+type stateHistoryReader struct {
 	disk    ethdb.KeyValueReader
 	freezer ethdb.AncientReader
 	readers map[string]*indexReaderWithLimitTag
 }
 
-// newHistoryReader constructs the history reader with the supplied db.
-func newHistoryReader(disk ethdb.KeyValueReader, freezer ethdb.AncientReader) *historyReader {
-	return &historyReader{
+// newStateHistoryReader constructs the history reader with the supplied db
+// for accessing historical states.
+func newStateHistoryReader(disk ethdb.KeyValueReader, freezer ethdb.AncientReader) *stateHistoryReader {
+	return &stateHistoryReader{
 		disk:    disk,
 		freezer: freezer,
 		readers: make(map[string]*indexReaderWithLimitTag),
@@ -117,7 +118,7 @@ func newHistoryReader(disk ethdb.KeyValueReader, freezer ethdb.AncientReader) *h
 
 // readAccountMetadata resolves the account metadata within the specified
 // state history.
-func (r *historyReader) readAccountMetadata(address common.Address, historyID uint64) ([]byte, error) {
+func (r *stateHistoryReader) readAccountMetadata(address common.Address, historyID uint64) ([]byte, error) {
 	blob := rawdb.ReadStateAccountIndex(r.freezer, historyID)
 	if len(blob) == 0 {
 		return nil, fmt.Errorf("account index is truncated, historyID: %d", historyID)
@@ -143,7 +144,7 @@ func (r *historyReader) readAccountMetadata(address common.Address, historyID ui
 
 // readStorageMetadata resolves the storage slot metadata within the specified
 // state history.
-func (r *historyReader) readStorageMetadata(storageKey common.Hash, storageHash common.Hash, historyID uint64, slotOffset, slotNumber int) ([]byte, error) {
+func (r *stateHistoryReader) readStorageMetadata(storageKey common.Hash, storageHash common.Hash, historyID uint64, slotOffset, slotNumber int) ([]byte, error) {
 	data, err := rawdb.ReadStateStorageIndex(r.freezer, historyID, slotIndexSize*slotOffset, slotIndexSize*slotNumber)
 	if err != nil {
 		msg := fmt.Sprintf("id: %d, slot-offset: %d, slot-length: %d", historyID, slotOffset, slotNumber)
@@ -178,7 +179,7 @@ func (r *historyReader) readStorageMetadata(storageKey common.Hash, storageHash
 }
 
 // readAccount retrieves the account data from the specified state history.
-func (r *historyReader) readAccount(address common.Address, historyID uint64) ([]byte, error) {
+func (r *stateHistoryReader) readAccount(address common.Address, historyID uint64) ([]byte, error) {
 	metadata, err := r.readAccountMetadata(address, historyID)
 	if err != nil {
 		return nil, err
@@ -194,7 +195,7 @@ func (r *historyReader) readAccount(address common.Address, historyID uint64) ([
 }
 
 // readStorage retrieves the storage slot data from the specified state history.
-func (r *historyReader) readStorage(address common.Address, storageKey common.Hash, storageHash common.Hash, historyID uint64) ([]byte, error) {
+func (r *stateHistoryReader) readStorage(address common.Address, storageKey common.Hash, storageHash common.Hash, historyID uint64) ([]byte, error) {
 	metadata, err := r.readAccountMetadata(address, historyID)
 	if err != nil {
 		return nil, err
@@ -224,35 +225,16 @@ func (r *historyReader) readStorage(address common.Address, storageKey common.Ha
 // stateID: represents the ID of the state of the specified version;
 // lastID: represents the ID of the latest/newest state history;
 // latestValue: represents the state value at the current disk layer with ID == lastID;
-func (r *historyReader) read(state stateIdentQuery, stateID uint64, lastID uint64, latestValue []byte) ([]byte, error) {
-	tail, err := r.freezer.Tail()
+func (r *stateHistoryReader) read(state stateIdentQuery, stateID uint64, lastID uint64, latestValue []byte) ([]byte, error) {
+	lastIndexed, err := checkStateAvail(state.stateIdent, typeStateHistory, r.freezer, stateID, lastID, r.disk)
 	if err != nil {
 		return nil, err
-	} // firstID = tail+1
-
-	// stateID+1 == firstID is allowed, as all the subsequent state histories
-	// are present with no gap inside.
-	if stateID < tail {
-		return nil, fmt.Errorf("historical state has been pruned, first: %d, state: %d", tail+1, stateID)
 	}
-
-	// To serve the request, all state histories from stateID+1 to lastID
-	// must be indexed. It's not supposed to happen unless system is very
-	// wrong.
-	metadata := loadIndexMetadata(r.disk, toHistoryType(state.typ))
-	if metadata == nil || metadata.Last < lastID {
-		indexed := "null"
-		if metadata != nil {
-			indexed = fmt.Sprintf("%d", metadata.Last)
-		}
-		return nil, fmt.Errorf("state history is not fully indexed, requested: %d, indexed: %s", stateID, indexed)
-	}
-
 	// Construct the index reader to locate the corresponding history for
 	// state retrieval
 	ir, ok := r.readers[state.String()]
 	if !ok {
-		ir, err = newIndexReaderWithLimitTag(r.disk, state.stateIdent, metadata.Last, 0)
+		ir, err = newIndexReaderWithLimitTag(r.disk, state.stateIdent, lastIndexed, 0)
 		if err != nil {
 			return nil, err
 		}
@@ -277,3 +259,34 @@ func (r *historyReader) read(state stateIdentQuery, stateID uint64, lastID uint6
 	}
 	return r.readStorage(state.address, state.storageKey, state.storageHash, historyID)
 }
+
+// checkStateAvail determines whether the requested historical state is available
+// for accessing. What's more, it also returns the ID of the latest indexed history
+// entry for subsequent usage.
+func checkStateAvail(state stateIdent, exptyp historyType, freezer ethdb.AncientReader, stateID uint64, lastID uint64, db ethdb.KeyValueReader) (uint64, error) {
+	if toHistoryType(state.typ) != exptyp {
+		return 0, fmt.Errorf("unsupported history type: %d, want: %v", toHistoryType(state.typ), exptyp)
+	}
+	// firstID = tail+1
+	tail, err := freezer.Tail()
+	if err != nil {
+		return 0, err
+	}
+	// stateID+1 == firstID is allowed, as all the subsequent history entries
+	// are present with no gap inside.
+	if stateID < tail {
+		return 0, fmt.Errorf("historical state has been pruned, first: %d, state: %d", tail+1, stateID)
+	}
+	// To serve the request, all history entries from stateID+1 to lastID
+	// must be indexed. It's not supposed to happen unless system is very
+	// wrong.
+	metadata := loadIndexMetadata(db, exptyp)
+	if metadata == nil || metadata.Last < lastID {
+		indexed := "null"
+		if metadata != nil {
+			indexed = fmt.Sprintf("%d", metadata.Last)
+		}
+		return 0, fmt.Errorf("history is not fully indexed, requested: %d, indexed: %s", stateID, indexed)
+	}
+	return metadata.Last, nil
+}
diff --git a/triedb/pathdb/history_reader_test.go b/triedb/pathdb/history_reader_test.go
index 3e1a545ff3..b69fba68cb 100644
--- a/triedb/pathdb/history_reader_test.go
+++ b/triedb/pathdb/history_reader_test.go
@@ -50,7 +50,7 @@ func stateAvail(id uint64, env *tester) bool {
 	return id+1 >= firstID
 }
 
-func checkHistoricalState(env *tester, root common.Hash, id uint64, hr *historyReader) error {
+func checkHistoricalState(env *tester, root common.Hash, id uint64, hr *stateHistoryReader) error {
 	if !stateAvail(id, env) {
 		return nil
 	}
@@ -157,7 +157,7 @@ func testHistoryReader(t *testing.T, historyLimit uint64) {
 	var (
 		roots = env.roots
 		dl    = env.db.tree.bottom()
-		hr    = newHistoryReader(env.db.diskdb, env.db.stateFreezer)
+		hr    = newStateHistoryReader(env.db.diskdb, env.db.stateFreezer)
 	)
 	for i, root := range roots {
 		if root == dl.rootHash() {
diff --git a/triedb/pathdb/history_state.go b/triedb/pathdb/history_state.go
index bc21915dba..23428b1a54 100644
--- a/triedb/pathdb/history_state.go
+++ b/triedb/pathdb/history_state.go
@@ -283,11 +283,11 @@ func (h *stateHistory) typ() historyType {
 
 // forEach implements the history interface, returning an iterator to traverse the
 // state entries in the history.
-func (h *stateHistory) forEach() iter.Seq[stateIdent] {
-	return func(yield func(stateIdent) bool) {
+func (h *stateHistory) forEach() iter.Seq[indexElem] {
+	return func(yield func(indexElem) bool) {
 		for _, addr := range h.accountList {
 			addrHash := crypto.Keccak256Hash(addr.Bytes())
-			if !yield(newAccountIdent(addrHash)) {
+			if !yield(accountIndexElem{addrHash}) {
 				return
 			}
 			for _, slotKey := range h.storageList[addr] {
@@ -298,7 +298,7 @@ func (h *stateHistory) forEach() iter.Seq[stateIdent] {
 				if h.meta.version != stateHistoryV0 {
 					slotHash = crypto.Keccak256Hash(slotKey.Bytes())
 				}
-				if !yield(newStorageIdent(addrHash, slotHash)) {
+				if !yield(storageIndexElem{addrHash, slotHash}) {
 					return
 				}
 			}
diff --git a/triedb/pathdb/history_trienode.go b/triedb/pathdb/history_trienode.go
index 6c0c0fe8cc..67be9de491 100644
--- a/triedb/pathdb/history_trienode.go
+++ b/triedb/pathdb/history_trienode.go
@@ -166,11 +166,35 @@ func (h *trienodeHistory) typ() historyType {
 
 // forEach implements the history interface, returning an iterator to traverse the
 // state entries in the history.
-func (h *trienodeHistory) forEach() iter.Seq[stateIdent] {
-	return func(yield func(stateIdent) bool) {
+func (h *trienodeHistory) forEach() iter.Seq[indexElem] {
+	return func(yield func(indexElem) bool) {
 		for _, owner := range h.owners {
-			for _, path := range h.nodeList[owner] {
-				if !yield(newTrienodeIdent(owner, path)) {
+			var (
+				scheme  *indexScheme
+				paths   = h.nodeList[owner]
+				indexes = make(map[string]map[uint16]struct{})
+			)
+			if owner == (common.Hash{}) {
+				scheme = accountIndexScheme
+			} else {
+				scheme = storageIndexScheme
+			}
+			for _, leaf := range findLeafPaths(paths) {
+				chunks, ids := scheme.splitPath(leaf)
+				for i := 0; i < len(chunks); i++ {
+					if _, exists := indexes[chunks[i]]; !exists {
+						indexes[chunks[i]] = make(map[uint16]struct{})
+					}
+					indexes[chunks[i]][ids[i]] = struct{}{}
+				}
+			}
+			for chunk, ids := range indexes {
+				elem := trienodeIndexElem{
+					owner: owner,
+					path:  chunk,
+					data:  slices.Collect(maps.Keys(ids)),
+				}
+				if !yield(elem) {
 					return
 				}
 			}
diff --git a/triedb/pathdb/history_trienode_test.go b/triedb/pathdb/history_trienode_test.go
index 0c0422f00f..8f9b9c2600 100644
--- a/triedb/pathdb/history_trienode_test.go
+++ b/triedb/pathdb/history_trienode_test.go
@@ -534,52 +534,6 @@ func TestTrienodeHistoryReaderNilKey(t *testing.T) {
 	}
 }
 
-// TestTrienodeHistoryReaderIterator tests the iterator functionality
-func TestTrienodeHistoryReaderIterator(t *testing.T) {
-	h := makeTrienodeHistory()
-
-	// Count expected entries
-	expectedCount := 0
-	expectedNodes := make(map[stateIdent]bool)
-	for owner, nodeList := range h.nodeList {
-		expectedCount += len(nodeList)
-		for _, node := range nodeList {
-			expectedNodes[stateIdent{
-				typ:         typeTrienode,
-				addressHash: owner,
-				path:        node,
-			}] = true
-		}
-	}
-
-	// Test the iterator
-	actualCount := 0
-	for x := range h.forEach() {
-		_ = x
-		actualCount++
-	}
-	if actualCount != expectedCount {
-		t.Fatalf("Iterator count mismatch: expected %d, got %d", expectedCount, actualCount)
-	}
-
-	// Test that iterator yields expected state identifiers
-	seen := make(map[stateIdent]bool)
-	for ident := range h.forEach() {
-		if ident.typ != typeTrienode {
-			t.Fatal("Iterator should only yield trienode history identifiers")
-		}
-		key := stateIdent{typ: ident.typ, addressHash: ident.addressHash, path: ident.path}
-		if seen[key] {
-			t.Fatal("Iterator yielded duplicate identifier")
-		}
-		seen[key] = true
-
-		if !expectedNodes[key] {
-			t.Fatalf("Unexpected yielded identifier %v", key)
-		}
-	}
-}
-
 // TestCommonPrefixLen tests the commonPrefixLen helper function
 func TestCommonPrefixLen(t *testing.T) {
 	tests := []struct {
diff --git a/triedb/pathdb/history_trienode_utils.go b/triedb/pathdb/history_trienode_utils.go
index 241b8a7d3c..11107494bb 100644
--- a/triedb/pathdb/history_trienode_utils.go
+++ b/triedb/pathdb/history_trienode_utils.go
@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"math/bits"
 	"slices"
+	"strings"
 )
 
 // commonPrefixLen returns the length of the common prefix shared by a and b.
@@ -34,6 +35,243 @@ func commonPrefixLen(a, b []byte) int {
 	return n
 }
 
+// findLeafPaths scans a lexicographically sorted list of paths and returns
+// the subset of paths that represent leaves.
+//
+// A path is considered a leaf if:
+//   - it is the last element in the list, or
+//   - the next path does not have the current path as its prefix.
+//
+// In other words, a leaf is a path that has no children extending it.
+//
+// Example:
+//
+//	Input:  ["a", "ab", "abc", "b", "ba"]
+//	Output: ["abc", "ba"]
+//
+// The input must be sorted; otherwise the result is undefined.
+func findLeafPaths(paths []string) []string {
+	var leaves []string
+	for i := 0; i < len(paths); i++ {
+		if i == len(paths)-1 || !strings.HasPrefix(paths[i+1], paths[i]) {
+			leaves = append(leaves, paths[i])
+		}
+	}
+	return leaves
+}
+
+// hexPathNodeID computes a numeric node ID from the given path. The path is
+// interpreted as a sequence of base-16 digits, where each byte of the input
+// is treated as one hexadecimal digit in a big-endian number.
+//
+// The resulting node ID is constructed as:
+//
+//	ID = 1 + 16 + 16^2 + ... + 16^(n-1) + value
+//
+// where n is the number of bytes in the path, and `value` is the base-16
+// interpretation of the byte sequence.
+//
+// The offset (1 + 16 + 16^2 + ... + 16^(n-1)) ensures that all IDs of shorter
+// paths occupy a lower numeric range, preserving lexicographic ordering between
+// differently-length paths.
+//
+// The numeric node ID is represented by the uint16 with the assumption the length
+// of path won't be greater than 3.
+func hexPathNodeID(path string) uint16 {
+	var (
+		offset = uint16(0)
+		pow    = uint16(1)
+		value  = uint16(0)
+		bytes  = []byte(path)
+	)
+	for i := 0; i < len(bytes); i++ {
+		offset += pow
+		pow *= 16
+	}
+	for i := 0; i < len(bytes); i++ {
+		value = value*16 + uint16(bytes[i])
+	}
+	return offset + value
+}
+
+// bitmapSize computes the number of bytes required for the marker bitmap
+// corresponding to the remaining portion of a path after a cut point.
+// The marker is a bitmap where each bit represents the presence of a
+// possible element in the remaining path segment.
+func bitmapSize(levels int) int {
+	// Compute: total = 1 + 16 + 16^2 + ... + 16^(segLen-1)
+	var (
+		bits = 0
+		pow  = 1
+	)
+	for i := 0; i < levels; i++ {
+		bits += pow
+		pow *= 16
+	}
+	// A small adjustment is applied to exclude the root element of this path
+	// segment, since any existing element would already imply the mutation of
+	// the root element. This trick can save us 1 byte for each bitmap which is
+	// non-trivial.
+	bits -= 1
+	return bits / 8
+}
+
+// indexScheme defines how trie nodes are split into chunks and index them
+// at chunk level.
+//
+// skipRoot indicates whether the root node should be excluded from indexing.
+// cutPoints specifies the key length of chunks (in nibbles) extracted from
+// each path.
+type indexScheme struct {
+	// skipRoot indicates whether the root node should be excluded from indexing.
+	// In the account trie, the root is mutated on every state transition, so
+	// indexing it provides no value.
+	skipRoot bool
+
+	// cutPoints defines the key lengths of chunks at different positions.
+	// A single trie node path may span multiple chunks vertically.
+	cutPoints []int
+
+	// bitmaps specifies the required bitmap size for each chunk. The key is the
+	// chunk key length, and the value is the corresponding bitmap size.
+	bitmaps map[int]int
+}
+
+var (
+	// Account trie is split into chunks like this:
+	//
+	// - root node is excluded from indexing
+	// - nodes at level1 to level2 are grouped as 16 chunks
+	// - all other nodes are grouped 3 levels per chunk
+	//
+	// Level1             [0]  ...  [f]               16 chunks
+	// Level3        [000]     ...     [fff]          4096 chunks
+	// Level6   [000000]       ...       [fffffff]    16777216 chunks
+	//
+	// For the chunks at level1,  there are 17 nodes per chunk.
+	//
+	// chunk-level 0            [ 0 ]                        1 node
+	// chunk-level 1        [ 1 ] … [ 16 ]                  16 nodes
+	//
+	// For the non-level1 chunks, there are 273 nodes per chunk,
+	// regardless of the chunk's depth in the trie.
+	//
+	// chunk-level 0            [ 0 ]                        1 node
+	// chunk-level 1        [ 1 ] … [ 16 ]                  16 nodes
+	// chunk-level 2     [ 17 ] … … [ 272 ]                256 nodes
+	accountIndexScheme = newIndexScheme(true)
+
+	// Storage trie is split into chunks like this: (3 levels per chunk)
+	//
+	// Level0              [ ROOT ]                      1 chunk
+	// Level3        [000]   ...   [fff]              4096 chunks
+	// Level6   [000000]    ...      [fffffff]    16777216 chunks
+	//
+	// Within each chunk, there are 273 nodes in total, regardless of
+	// the chunk's depth in the trie.
+	//
+	// chunk-level 0            [ 0 ]                        1 node
+	// chunk-level 1        [ 1 ] … [ 16 ]                  16 nodes
+	// chunk-level 2     [ 17 ] … … [ 272 ]                256 nodes
+	storageIndexScheme = newIndexScheme(false)
+)
+
+// newIndexScheme initializes the index scheme.
+func newIndexScheme(skipRoot bool) *indexScheme {
+	var (
+		cuts    []int
+		bitmaps = make(map[int]int)
+	)
+	for v := 0; v <= 64; v += 3 {
+		var (
+			levels int
+			length int
+		)
+		if v == 0 && skipRoot {
+			length = 1
+			levels = 2
+		} else {
+			length = v
+			levels = 3
+		}
+		cuts = append(cuts, length)
+		bitmaps[length] = bitmapSize(levels)
+	}
+	return &indexScheme{
+		skipRoot:  skipRoot,
+		cutPoints: cuts,
+		bitmaps:   bitmaps,
+	}
+}
+
+// getBitmapSize returns the required bytes for bitmap with chunk's position.
+func (s *indexScheme) getBitmapSize(pathLen int) int {
+	return s.bitmaps[pathLen]
+}
+
+// chunkSpan returns how many chunks should be spanned with the given path.
+func (s *indexScheme) chunkSpan(length int) int {
+	var n int
+	for _, cut := range s.cutPoints {
+		if length >= cut {
+			n++
+			continue
+		}
+	}
+	return n
+}
+
+// splitPath applies the indexScheme to the given path and returns two lists:
+//
+// - chunkIDs: the progressive chunk IDs cuts defined by the scheme
+// - innerIDs: the computed node ID for the path segment following each cut
+//
+// The scheme defines a set of cut points that partition the path. For each cut:
+//
+// - chunkIDs[i] is path[:cutPoints[i]]
+// - innerIDs[i] is the node ID of the segment path[cutPoints[i] : nextCut-1]
+func (s *indexScheme) splitPath(path string) ([]string, []uint16) {
+	// Special case: the root node of the account trie is mutated in every
+	// state transition, so its mutation records can be ignored.
+	n := len(path)
+	if n == 0 && s.skipRoot {
+		return nil, nil
+	}
+	var (
+		// Determine how many chunks are spanned by the path
+		chunks   = s.chunkSpan(n)
+		chunkIDs = make([]string, 0, chunks)
+		nodeIDs  = make([]uint16, 0, chunks)
+	)
+	for i := 0; i < chunks; i++ {
+		position := s.cutPoints[i]
+		chunkIDs = append(chunkIDs, path[:position])
+
+		var limit int
+		if i != chunks-1 {
+			limit = s.cutPoints[i+1] - 1
+		} else {
+			limit = len(path)
+		}
+		nodeIDs = append(nodeIDs, hexPathNodeID(path[position:limit]))
+	}
+	return chunkIDs, nodeIDs
+}
+
+// splitPathLast returns the path prefix of the deepest chunk spanned by the
+// given path, along with its corresponding internal node ID. If the path
+// spans no chunks, it returns an empty prefix and 0.
+//
+// nolint:unused
+func (s *indexScheme) splitPathLast(path string) (string, uint16) {
+	chunkIDs, nodeIDs := s.splitPath(path)
+	if len(chunkIDs) == 0 {
+		return "", 0
+	}
+	n := len(chunkIDs)
+	return chunkIDs[n-1], nodeIDs[n-1]
+}
+
 // encodeIDs sorts the given list of uint16 IDs and encodes them into a
 // compact byte slice using variable-length unsigned integer encoding.
 func encodeIDs(ids []uint16) []byte {
diff --git a/triedb/pathdb/history_trienode_utils_test.go b/triedb/pathdb/history_trienode_utils_test.go
index c3bd0d5b1f..32bd91166d 100644
--- a/triedb/pathdb/history_trienode_utils_test.go
+++ b/triedb/pathdb/history_trienode_utils_test.go
@@ -22,6 +22,464 @@ import (
 	"testing"
 )
 
+func TestHexPathNodeID(t *testing.T) {
+	t.Parallel()
+
+	var suites = []struct {
+		input string
+		exp   uint16
+	}{
+		{
+			input: "",
+			exp:   0,
+		},
+		{
+			input: string([]byte{0x0}),
+			exp:   1,
+		},
+		{
+			input: string([]byte{0xf}),
+			exp:   16,
+		},
+		{
+			input: string([]byte{0x0, 0x0}),
+			exp:   17,
+		},
+		{
+			input: string([]byte{0x0, 0xf}),
+			exp:   32,
+		},
+		{
+			input: string([]byte{0x1, 0x0}),
+			exp:   33,
+		},
+		{
+			input: string([]byte{0x1, 0xf}),
+			exp:   48,
+		},
+		{
+			input: string([]byte{0xf, 0xf}),
+			exp:   272,
+		},
+		{
+			input: string([]byte{0xf, 0xf, 0xf}),
+			exp:   4368,
+		},
+	}
+	for _, suite := range suites {
+		got := hexPathNodeID(suite.input)
+		if got != suite.exp {
+			t.Fatalf("Unexpected node ID for %v: got %d, want %d", suite.input, got, suite.exp)
+		}
+	}
+}
+
+func TestFindLeafPaths(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		input  []string
+		expect []string
+	}{
+		{
+			input:  nil,
+			expect: nil,
+		},
+		{
+			input:  []string{"a"},
+			expect: []string{"a"},
+		},
+		{
+			input: []string{"", "0", "00", "01", "1"},
+			expect: []string{
+				"00",
+				"01",
+				"1",
+			},
+		},
+		{
+			input: []string{"10", "100", "11", "2"},
+			expect: []string{
+				"100",
+				"11",
+				"2",
+			},
+		},
+		{
+			input: []string{"10", "100000000", "11", "111111111", "2"},
+			expect: []string{
+				"100000000",
+				"111111111",
+				"2",
+			},
+		},
+	}
+	for _, test := range tests {
+		res := findLeafPaths(test.input)
+		if !reflect.DeepEqual(res, test.expect) {
+			t.Fatalf("Unexpected result: %v, expected %v", res, test.expect)
+		}
+	}
+}
+
+func TestSplitAccountPath(t *testing.T) {
+	t.Parallel()
+
+	var suites = []struct {
+		input     string
+		expPrefix []string
+		expID     []uint16
+	}{
+		// Length = 0
+		{
+			"", nil, nil,
+		},
+		// Length = 1
+		{
+			string([]byte{0x0}),
+			[]string{
+				string([]byte{0x0}),
+			},
+			[]uint16{
+				0,
+			},
+		},
+		{
+			string([]byte{0x1}),
+			[]string{
+				string([]byte{0x1}),
+			},
+			[]uint16{
+				0,
+			},
+		},
+		{
+			string([]byte{0xf}),
+			[]string{
+				string([]byte{0xf}),
+			},
+			[]uint16{
+				0,
+			},
+		},
+		// Length = 2
+		{
+			string([]byte{0x0, 0x0}),
+			[]string{
+				string([]byte{0x0}),
+			},
+			[]uint16{
+				1,
+			},
+		},
+		{
+			string([]byte{0x0, 0x1}),
+			[]string{
+				string([]byte{0x0}),
+			},
+			[]uint16{
+				2,
+			},
+		},
+		{
+			string([]byte{0x0, 0xf}),
+			[]string{
+				string([]byte{0x0}),
+			},
+			[]uint16{
+				16,
+			},
+		},
+		{
+			string([]byte{0xf, 0xf}),
+			[]string{
+				string([]byte{0xf}),
+			},
+			[]uint16{
+				16,
+			},
+		},
+		// Length = 3
+		{
+			string([]byte{0x0, 0x0, 0x0}),
+			[]string{
+				string([]byte{0x0}),
+				string([]byte{0x0, 0x0, 0x0}),
+			},
+			[]uint16{
+				1, 0,
+			},
+		},
+		// Length = 3
+		{
+			string([]byte{0xf, 0xf, 0xf}),
+			[]string{
+				string([]byte{0xf}),
+				string([]byte{0xf, 0xf, 0xf}),
+			},
+			[]uint16{
+				16, 0,
+			},
+		},
+		// Length = 4
+		{
+			string([]byte{0x0, 0x0, 0x0, 0x0}),
+			[]string{
+				string([]byte{0x0}),
+				string([]byte{0x0, 0x0, 0x0}),
+			},
+			[]uint16{
+				1, 1,
+			},
+		},
+		{
+			string([]byte{0xf, 0xf, 0xf, 0xf}),
+			[]string{
+				string([]byte{0xf}),
+				string([]byte{0xf, 0xf, 0xf}),
+			},
+			[]uint16{
+				16, 16,
+			},
+		},
+		// Length = 5
+		{
+			string([]byte{0x0, 0x0, 0x0, 0x0, 0x0}),
+			[]string{
+				string([]byte{0x0}),
+				string([]byte{0x0, 0x0, 0x0}),
+			},
+			[]uint16{
+				1, 17,
+			},
+		},
+		{
+			string([]byte{0xf, 0xf, 0xf, 0xf, 0xf}),
+			[]string{
+				string([]byte{0xf}),
+				string([]byte{0xf, 0xf, 0xf}),
+			},
+			[]uint16{
+				16, 272,
+			},
+		},
+		// Length = 6
+		{
+			string([]byte{0x0, 0x0, 0x0, 0x0, 0x0, 0x0}),
+			[]string{
+				string([]byte{0x0}),
+				string([]byte{0x0, 0x0, 0x0}),
+				string([]byte{0x0, 0x0, 0x0, 0x0, 0x0, 0x0}),
+			},
+			[]uint16{
+				1, 17, 0,
+			},
+		},
+		{
+			string([]byte{0xf, 0xf, 0xf, 0xf, 0xf, 0xf}),
+			[]string{
+				string([]byte{0xf}),
+				string([]byte{0xf, 0xf, 0xf}),
+				string([]byte{0xf, 0xf, 0xf, 0xf, 0xf, 0xf}),
+			},
+			[]uint16{
+				16, 272, 0,
+			},
+		},
+	}
+	for _, suite := range suites {
+		prefix, id := accountIndexScheme.splitPath(suite.input)
+		if !reflect.DeepEqual(prefix, suite.expPrefix) {
+			t.Fatalf("Unexpected prefix for %v: got %v, want %v", suite.input, prefix, suite.expPrefix)
+		}
+		if !reflect.DeepEqual(id, suite.expID) {
+			t.Fatalf("Unexpected ID for %v: got %v, want %v", suite.input, id, suite.expID)
+		}
+	}
+}
+
+func TestSplitStoragePath(t *testing.T) {
+	t.Parallel()
+
+	var suites = []struct {
+		input     string
+		expPrefix []string
+		expID     []uint16
+	}{
+		// Length = 0
+		{
+			"",
+			[]string{
+				string([]byte{}),
+			},
+			[]uint16{
+				0,
+			},
+		},
+		// Length = 1
+		{
+			string([]byte{0x0}),
+			[]string{
+				string([]byte{}),
+			},
+			[]uint16{
+				1,
+			},
+		},
+		{
+			string([]byte{0x1}),
+			[]string{
+				string([]byte{}),
+			},
+			[]uint16{
+				2,
+			},
+		},
+		{
+			string([]byte{0xf}),
+			[]string{
+				string([]byte{}),
+			},
+			[]uint16{
+				16,
+			},
+		},
+		// Length = 2
+		{
+			string([]byte{0x0, 0x0}),
+			[]string{
+				string([]byte{}),
+			},
+			[]uint16{
+				17,
+			},
+		},
+		{
+			string([]byte{0x0, 0x1}),
+			[]string{
+				string([]byte{}),
+			},
+			[]uint16{
+				18,
+			},
+		},
+		{
+			string([]byte{0x0, 0xf}),
+			[]string{
+				string([]byte{}),
+			},
+			[]uint16{
+				32,
+			},
+		},
+		{
+			string([]byte{0xf, 0xf}),
+			[]string{
+				string([]byte{}),
+			},
+			[]uint16{
+				272,
+			},
+		},
+		// Length = 3
+		{
+			string([]byte{0x0, 0x0, 0x0}),
+			[]string{
+				string([]byte{}),
+				string([]byte{0x0, 0x0, 0x0}),
+			},
+			[]uint16{
+				17, 0,
+			},
+		},
+		// Length = 3
+		{
+			string([]byte{0xf, 0xf, 0xf}),
+			[]string{
+				string([]byte{}),
+				string([]byte{0xf, 0xf, 0xf}),
+			},
+			[]uint16{
+				272, 0,
+			},
+		},
+		// Length = 4
+		{
+			string([]byte{0x0, 0x0, 0x0, 0x0}),
+			[]string{
+				string([]byte{}),
+				string([]byte{0x0, 0x0, 0x0}),
+			},
+			[]uint16{
+				17, 1,
+			},
+		},
+		{
+			string([]byte{0xf, 0xf, 0xf, 0xf}),
+			[]string{
+				string([]byte{}),
+				string([]byte{0xf, 0xf, 0xf}),
+			},
+			[]uint16{
+				272, 16,
+			},
+		},
+		// Length = 5
+		{
+			string([]byte{0x0, 0x0, 0x0, 0x0, 0x0}),
+			[]string{
+				string([]byte{}),
+				string([]byte{0x0, 0x0, 0x0}),
+			},
+			[]uint16{
+				17, 17,
+			},
+		},
+		{
+			string([]byte{0xf, 0xf, 0xf, 0xf, 0xf}),
+			[]string{
+				string([]byte{}),
+				string([]byte{0xf, 0xf, 0xf}),
+			},
+			[]uint16{
+				272, 272,
+			},
+		},
+		// Length = 6
+		{
+			string([]byte{0x0, 0x0, 0x0, 0x0, 0x0, 0x0}),
+			[]string{
+				string([]byte{}),
+				string([]byte{0x0, 0x0, 0x0}),
+				string([]byte{0x0, 0x0, 0x0, 0x0, 0x0, 0x0}),
+			},
+			[]uint16{
+				17, 17, 0,
+			},
+		},
+		{
+			string([]byte{0xf, 0xf, 0xf, 0xf, 0xf, 0xf}),
+			[]string{
+				string([]byte{}),
+				string([]byte{0xf, 0xf, 0xf}),
+				string([]byte{0xf, 0xf, 0xf, 0xf, 0xf, 0xf}),
+			},
+			[]uint16{
+				272, 272, 0,
+			},
+		},
+	}
+	for i, suite := range suites {
+		prefix, id := storageIndexScheme.splitPath(suite.input)
+		if !reflect.DeepEqual(prefix, suite.expPrefix) {
+			t.Fatalf("Test %d, unexpected prefix for %v: got %v, want %v", i, suite.input, prefix, suite.expPrefix)
+		}
+		if !reflect.DeepEqual(id, suite.expID) {
+			t.Fatalf("Test %d, unexpected ID for %v: got %v, want %v", i, suite.input, id, suite.expID)
+		}
+	}
+}
+
 func TestIsAncestor(t *testing.T) {
 	suites := []struct {
 		x, y uint16
diff --git a/triedb/pathdb/reader.go b/triedb/pathdb/reader.go
index 842ac0972e..c76d88b594 100644
--- a/triedb/pathdb/reader.go
+++ b/triedb/pathdb/reader.go
@@ -200,7 +200,7 @@ func (db *Database) StateReader(root common.Hash) (database.StateReader, error)
 // historical state.
 type HistoricalStateReader struct {
 	db     *Database
-	reader *historyReader
+	reader *stateHistoryReader
 	id     uint64
 }
 
@@ -234,7 +234,7 @@ func (db *Database) HistoricReader(root common.Hash) (*HistoricalStateReader, er
 	return &HistoricalStateReader{
 		id:     *id,
 		db:     db,
-		reader: newHistoryReader(db.diskdb, db.stateFreezer),
+		reader: newStateHistoryReader(db.diskdb, db.stateFreezer),
 	}, nil
 }