From 588dd94aadca36d8a55a44457ff31dd480073a97 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Sat, 17 Jan 2026 20:28:37 +0800 Subject: [PATCH] triedb/pathdb: implement trienode history indexing scheme (#33551) This PR implements the indexing scheme for trie node history. Check https://github.com/ethereum/go-ethereum/pull/33399 for more details --- cmd/keeper/go.mod | 1 + triedb/pathdb/database_test.go | 4 +- triedb/pathdb/history.go | 79 +++- triedb/pathdb/history_indexer.go | 34 +- triedb/pathdb/history_reader.go | 75 +-- triedb/pathdb/history_reader_test.go | 4 +- triedb/pathdb/history_state.go | 8 +- triedb/pathdb/history_trienode.go | 32 +- triedb/pathdb/history_trienode_test.go | 46 -- triedb/pathdb/history_trienode_utils.go | 238 ++++++++++ triedb/pathdb/history_trienode_utils_test.go | 458 +++++++++++++++++++ triedb/pathdb/reader.go | 4 +- 12 files changed, 870 insertions(+), 113 deletions(-) diff --git a/cmd/keeper/go.mod b/cmd/keeper/go.mod index cee1ce05a7..21cdfe8c33 100644 --- a/cmd/keeper/go.mod +++ b/cmd/keeper/go.mod @@ -33,6 +33,7 @@ require ( github.com/tklauser/go-sysconf v0.3.12 // indirect github.com/tklauser/numcpus v0.6.1 // indirect golang.org/x/crypto v0.36.0 // indirect + golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df // indirect golang.org/x/sync v0.12.0 // indirect golang.org/x/sys v0.39.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect diff --git a/triedb/pathdb/database_test.go b/triedb/pathdb/database_test.go index 8cca7b1b3c..2d1819d08f 100644 --- a/triedb/pathdb/database_test.go +++ b/triedb/pathdb/database_test.go @@ -950,7 +950,7 @@ func TestDatabaseIndexRecovery(t *testing.T) { var ( dIndex int roots = env.roots - hr = newHistoryReader(env.db.diskdb, env.db.stateFreezer) + hr = newStateHistoryReader(env.db.diskdb, env.db.stateFreezer) ) for i, root := range roots { if root == dRoot { @@ -1011,7 +1011,7 @@ func TestDatabaseIndexRecovery(t *testing.T) { // Ensure the truncated state histories become accessible bRoot = env.db.tree.bottom().rootHash() - hr = newHistoryReader(env.db.diskdb, env.db.stateFreezer) + hr = newStateHistoryReader(env.db.diskdb, env.db.stateFreezer) for i, root := range roots { if root == bRoot { break diff --git a/triedb/pathdb/history.go b/triedb/pathdb/history.go index d78999f218..86224ea5b2 100644 --- a/triedb/pathdb/history.go +++ b/triedb/pathdb/history.go @@ -121,6 +121,20 @@ func (ident stateIdent) String() string { return ident.addressHash.Hex() + ident.path } +func (ident stateIdent) bloomSize() int { + if ident.typ == typeAccount { + return 0 + } + if ident.typ == typeStorage { + return 0 + } + scheme := accountIndexScheme + if ident.addressHash != (common.Hash{}) { + scheme = storageIndexScheme + } + return scheme.getBitmapSize(len(ident.path)) +} + // newAccountIdent constructs a state identifier for an account. func newAccountIdent(addressHash common.Hash) stateIdent { return stateIdent{ @@ -143,6 +157,8 @@ func newStorageIdent(addressHash common.Hash, storageHash common.Hash) stateIden // newTrienodeIdent constructs a state identifier for a trie node. // The address denotes the address hash of the associated account; // the path denotes the path of the node within the trie; +// +// nolint:unused func newTrienodeIdent(addressHash common.Hash, path string) stateIdent { return stateIdent{ typ: typeTrienode, @@ -180,17 +196,62 @@ func newStorageIdentQuery(address common.Address, addressHash common.Hash, stora } } -// newTrienodeIdentQuery constructs a state identifier for a trie node. -// the addressHash denotes the address hash of the associated account; -// the path denotes the path of the node within the trie; -// -// nolint:unused -func newTrienodeIdentQuery(addrHash common.Hash, path []byte) stateIdentQuery { - return stateIdentQuery{ - stateIdent: newTrienodeIdent(addrHash, string(path)), +// indexElem defines the element for indexing. +type indexElem interface { + key() stateIdent + ext() []uint16 +} + +type accountIndexElem struct { + addressHash common.Hash +} + +func (a accountIndexElem) key() stateIdent { + return stateIdent{ + typ: typeAccount, + addressHash: a.addressHash, } } +func (a accountIndexElem) ext() []uint16 { + return nil +} + +type storageIndexElem struct { + addressHash common.Hash + storageHash common.Hash +} + +func (a storageIndexElem) key() stateIdent { + return stateIdent{ + typ: typeStorage, + addressHash: a.addressHash, + storageHash: a.storageHash, + } +} + +func (a storageIndexElem) ext() []uint16 { + return nil +} + +type trienodeIndexElem struct { + owner common.Hash + path string + data []uint16 +} + +func (a trienodeIndexElem) key() stateIdent { + return stateIdent{ + typ: typeTrienode, + addressHash: a.owner, + path: a.path, + } +} + +func (a trienodeIndexElem) ext() []uint16 { + return a.data +} + // history defines the interface of historical data, shared by stateHistory // and trienodeHistory. type history interface { @@ -198,7 +259,7 @@ type history interface { typ() historyType // forEach returns an iterator to traverse the state entries in the history. - forEach() iter.Seq[stateIdent] + forEach() iter.Seq[indexElem] } var ( diff --git a/triedb/pathdb/history_indexer.go b/triedb/pathdb/history_indexer.go index ddb4a293cc..18d71f6dae 100644 --- a/triedb/pathdb/history_indexer.go +++ b/triedb/pathdb/history_indexer.go @@ -29,6 +29,7 @@ import ( "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" + "golang.org/x/exp/maps" "golang.org/x/sync/errgroup" ) @@ -121,18 +122,20 @@ func deleteIndexMetadata(db ethdb.KeyValueWriter, typ historyType) { // batchIndexer is responsible for performing batch indexing or unindexing // of historical data (e.g., state or trie node changes) atomically. type batchIndexer struct { - index map[stateIdent][]uint64 // List of history IDs for tracked state entry - pending int // Number of entries processed in the current batch. - delete bool // Operation mode: true for unindex, false for index. - lastID uint64 // ID of the most recently processed history. - typ historyType // Type of history being processed (e.g., state or trienode). - db ethdb.KeyValueStore // Key-value database used to store or delete index data. + index map[stateIdent][]uint64 // List of history IDs for tracked state entry + ext map[stateIdent][][]uint16 // List of extension for each state element + pending int // Number of entries processed in the current batch. + delete bool // Operation mode: true for unindex, false for index. + lastID uint64 // ID of the most recently processed history. + typ historyType // Type of history being processed (e.g., state or trienode). + db ethdb.KeyValueStore // Key-value database used to store or delete index data. } // newBatchIndexer constructs the batch indexer with the supplied mode. func newBatchIndexer(db ethdb.KeyValueStore, delete bool, typ historyType) *batchIndexer { return &batchIndexer{ index: make(map[stateIdent][]uint64), + ext: make(map[stateIdent][][]uint16), delete: delete, typ: typ, db: db, @@ -142,8 +145,10 @@ func newBatchIndexer(db ethdb.KeyValueStore, delete bool, typ historyType) *batc // process traverses the state entries within the provided history and tracks the mutation // records for them. func (b *batchIndexer) process(h history, id uint64) error { - for ident := range h.forEach() { - b.index[ident] = append(b.index[ident], id) + for elem := range h.forEach() { + key := elem.key() + b.index[key] = append(b.index[key], id) + b.ext[key] = append(b.ext[key], elem.ext()) b.pending++ } b.lastID = id @@ -190,14 +195,15 @@ func (b *batchIndexer) finish(force bool) error { indexed = metadata.Last } for ident, list := range b.index { + ext := b.ext[ident] eg.Go(func() error { if !b.delete { - iw, err := newIndexWriter(b.db, ident, indexed, 0) + iw, err := newIndexWriter(b.db, ident, indexed, ident.bloomSize()) if err != nil { return err } - for _, n := range list { - if err := iw.append(n, nil); err != nil { + for i, n := range list { + if err := iw.append(n, ext[i]); err != nil { return err } } @@ -205,7 +211,7 @@ func (b *batchIndexer) finish(force bool) error { iw.finish(batch) }) } else { - id, err := newIndexDeleter(b.db, ident, indexed, 0) + id, err := newIndexDeleter(b.db, ident, indexed, ident.bloomSize()) if err != nil { return err } @@ -239,8 +245,10 @@ func (b *batchIndexer) finish(force bool) error { return err } log.Debug("Committed batch indexer", "type", b.typ, "entries", len(b.index), "records", b.pending, "size", common.StorageSize(batchSize), "elapsed", common.PrettyDuration(time.Since(start))) + b.pending = 0 - b.index = make(map[stateIdent][]uint64) + maps.Clear(b.index) + maps.Clear(b.ext) return nil } diff --git a/triedb/pathdb/history_reader.go b/triedb/pathdb/history_reader.go index 69e7d5bd22..04cd869d2b 100644 --- a/triedb/pathdb/history_reader.go +++ b/triedb/pathdb/history_reader.go @@ -99,16 +99,17 @@ func (r *indexReaderWithLimitTag) readGreaterThan(id uint64, lastID uint64) (uin return r.reader.readGreaterThan(id) } -// historyReader is the structure to access historic state data. -type historyReader struct { +// stateHistoryReader is the structure to access historic state data. +type stateHistoryReader struct { disk ethdb.KeyValueReader freezer ethdb.AncientReader readers map[string]*indexReaderWithLimitTag } -// newHistoryReader constructs the history reader with the supplied db. -func newHistoryReader(disk ethdb.KeyValueReader, freezer ethdb.AncientReader) *historyReader { - return &historyReader{ +// newStateHistoryReader constructs the history reader with the supplied db +// for accessing historical states. +func newStateHistoryReader(disk ethdb.KeyValueReader, freezer ethdb.AncientReader) *stateHistoryReader { + return &stateHistoryReader{ disk: disk, freezer: freezer, readers: make(map[string]*indexReaderWithLimitTag), @@ -117,7 +118,7 @@ func newHistoryReader(disk ethdb.KeyValueReader, freezer ethdb.AncientReader) *h // readAccountMetadata resolves the account metadata within the specified // state history. -func (r *historyReader) readAccountMetadata(address common.Address, historyID uint64) ([]byte, error) { +func (r *stateHistoryReader) readAccountMetadata(address common.Address, historyID uint64) ([]byte, error) { blob := rawdb.ReadStateAccountIndex(r.freezer, historyID) if len(blob) == 0 { return nil, fmt.Errorf("account index is truncated, historyID: %d", historyID) @@ -143,7 +144,7 @@ func (r *historyReader) readAccountMetadata(address common.Address, historyID ui // readStorageMetadata resolves the storage slot metadata within the specified // state history. -func (r *historyReader) readStorageMetadata(storageKey common.Hash, storageHash common.Hash, historyID uint64, slotOffset, slotNumber int) ([]byte, error) { +func (r *stateHistoryReader) readStorageMetadata(storageKey common.Hash, storageHash common.Hash, historyID uint64, slotOffset, slotNumber int) ([]byte, error) { data, err := rawdb.ReadStateStorageIndex(r.freezer, historyID, slotIndexSize*slotOffset, slotIndexSize*slotNumber) if err != nil { msg := fmt.Sprintf("id: %d, slot-offset: %d, slot-length: %d", historyID, slotOffset, slotNumber) @@ -178,7 +179,7 @@ func (r *historyReader) readStorageMetadata(storageKey common.Hash, storageHash } // readAccount retrieves the account data from the specified state history. -func (r *historyReader) readAccount(address common.Address, historyID uint64) ([]byte, error) { +func (r *stateHistoryReader) readAccount(address common.Address, historyID uint64) ([]byte, error) { metadata, err := r.readAccountMetadata(address, historyID) if err != nil { return nil, err @@ -194,7 +195,7 @@ func (r *historyReader) readAccount(address common.Address, historyID uint64) ([ } // readStorage retrieves the storage slot data from the specified state history. -func (r *historyReader) readStorage(address common.Address, storageKey common.Hash, storageHash common.Hash, historyID uint64) ([]byte, error) { +func (r *stateHistoryReader) readStorage(address common.Address, storageKey common.Hash, storageHash common.Hash, historyID uint64) ([]byte, error) { metadata, err := r.readAccountMetadata(address, historyID) if err != nil { return nil, err @@ -224,35 +225,16 @@ func (r *historyReader) readStorage(address common.Address, storageKey common.Ha // stateID: represents the ID of the state of the specified version; // lastID: represents the ID of the latest/newest state history; // latestValue: represents the state value at the current disk layer with ID == lastID; -func (r *historyReader) read(state stateIdentQuery, stateID uint64, lastID uint64, latestValue []byte) ([]byte, error) { - tail, err := r.freezer.Tail() +func (r *stateHistoryReader) read(state stateIdentQuery, stateID uint64, lastID uint64, latestValue []byte) ([]byte, error) { + lastIndexed, err := checkStateAvail(state.stateIdent, typeStateHistory, r.freezer, stateID, lastID, r.disk) if err != nil { return nil, err - } // firstID = tail+1 - - // stateID+1 == firstID is allowed, as all the subsequent state histories - // are present with no gap inside. - if stateID < tail { - return nil, fmt.Errorf("historical state has been pruned, first: %d, state: %d", tail+1, stateID) } - - // To serve the request, all state histories from stateID+1 to lastID - // must be indexed. It's not supposed to happen unless system is very - // wrong. - metadata := loadIndexMetadata(r.disk, toHistoryType(state.typ)) - if metadata == nil || metadata.Last < lastID { - indexed := "null" - if metadata != nil { - indexed = fmt.Sprintf("%d", metadata.Last) - } - return nil, fmt.Errorf("state history is not fully indexed, requested: %d, indexed: %s", stateID, indexed) - } - // Construct the index reader to locate the corresponding history for // state retrieval ir, ok := r.readers[state.String()] if !ok { - ir, err = newIndexReaderWithLimitTag(r.disk, state.stateIdent, metadata.Last, 0) + ir, err = newIndexReaderWithLimitTag(r.disk, state.stateIdent, lastIndexed, 0) if err != nil { return nil, err } @@ -277,3 +259,34 @@ func (r *historyReader) read(state stateIdentQuery, stateID uint64, lastID uint6 } return r.readStorage(state.address, state.storageKey, state.storageHash, historyID) } + +// checkStateAvail determines whether the requested historical state is available +// for accessing. What's more, it also returns the ID of the latest indexed history +// entry for subsequent usage. +func checkStateAvail(state stateIdent, exptyp historyType, freezer ethdb.AncientReader, stateID uint64, lastID uint64, db ethdb.KeyValueReader) (uint64, error) { + if toHistoryType(state.typ) != exptyp { + return 0, fmt.Errorf("unsupported history type: %d, want: %v", toHistoryType(state.typ), exptyp) + } + // firstID = tail+1 + tail, err := freezer.Tail() + if err != nil { + return 0, err + } + // stateID+1 == firstID is allowed, as all the subsequent history entries + // are present with no gap inside. + if stateID < tail { + return 0, fmt.Errorf("historical state has been pruned, first: %d, state: %d", tail+1, stateID) + } + // To serve the request, all history entries from stateID+1 to lastID + // must be indexed. It's not supposed to happen unless system is very + // wrong. + metadata := loadIndexMetadata(db, exptyp) + if metadata == nil || metadata.Last < lastID { + indexed := "null" + if metadata != nil { + indexed = fmt.Sprintf("%d", metadata.Last) + } + return 0, fmt.Errorf("history is not fully indexed, requested: %d, indexed: %s", stateID, indexed) + } + return metadata.Last, nil +} diff --git a/triedb/pathdb/history_reader_test.go b/triedb/pathdb/history_reader_test.go index 3e1a545ff3..b69fba68cb 100644 --- a/triedb/pathdb/history_reader_test.go +++ b/triedb/pathdb/history_reader_test.go @@ -50,7 +50,7 @@ func stateAvail(id uint64, env *tester) bool { return id+1 >= firstID } -func checkHistoricalState(env *tester, root common.Hash, id uint64, hr *historyReader) error { +func checkHistoricalState(env *tester, root common.Hash, id uint64, hr *stateHistoryReader) error { if !stateAvail(id, env) { return nil } @@ -157,7 +157,7 @@ func testHistoryReader(t *testing.T, historyLimit uint64) { var ( roots = env.roots dl = env.db.tree.bottom() - hr = newHistoryReader(env.db.diskdb, env.db.stateFreezer) + hr = newStateHistoryReader(env.db.diskdb, env.db.stateFreezer) ) for i, root := range roots { if root == dl.rootHash() { diff --git a/triedb/pathdb/history_state.go b/triedb/pathdb/history_state.go index bc21915dba..23428b1a54 100644 --- a/triedb/pathdb/history_state.go +++ b/triedb/pathdb/history_state.go @@ -283,11 +283,11 @@ func (h *stateHistory) typ() historyType { // forEach implements the history interface, returning an iterator to traverse the // state entries in the history. -func (h *stateHistory) forEach() iter.Seq[stateIdent] { - return func(yield func(stateIdent) bool) { +func (h *stateHistory) forEach() iter.Seq[indexElem] { + return func(yield func(indexElem) bool) { for _, addr := range h.accountList { addrHash := crypto.Keccak256Hash(addr.Bytes()) - if !yield(newAccountIdent(addrHash)) { + if !yield(accountIndexElem{addrHash}) { return } for _, slotKey := range h.storageList[addr] { @@ -298,7 +298,7 @@ func (h *stateHistory) forEach() iter.Seq[stateIdent] { if h.meta.version != stateHistoryV0 { slotHash = crypto.Keccak256Hash(slotKey.Bytes()) } - if !yield(newStorageIdent(addrHash, slotHash)) { + if !yield(storageIndexElem{addrHash, slotHash}) { return } } diff --git a/triedb/pathdb/history_trienode.go b/triedb/pathdb/history_trienode.go index 6c0c0fe8cc..67be9de491 100644 --- a/triedb/pathdb/history_trienode.go +++ b/triedb/pathdb/history_trienode.go @@ -166,11 +166,35 @@ func (h *trienodeHistory) typ() historyType { // forEach implements the history interface, returning an iterator to traverse the // state entries in the history. -func (h *trienodeHistory) forEach() iter.Seq[stateIdent] { - return func(yield func(stateIdent) bool) { +func (h *trienodeHistory) forEach() iter.Seq[indexElem] { + return func(yield func(indexElem) bool) { for _, owner := range h.owners { - for _, path := range h.nodeList[owner] { - if !yield(newTrienodeIdent(owner, path)) { + var ( + scheme *indexScheme + paths = h.nodeList[owner] + indexes = make(map[string]map[uint16]struct{}) + ) + if owner == (common.Hash{}) { + scheme = accountIndexScheme + } else { + scheme = storageIndexScheme + } + for _, leaf := range findLeafPaths(paths) { + chunks, ids := scheme.splitPath(leaf) + for i := 0; i < len(chunks); i++ { + if _, exists := indexes[chunks[i]]; !exists { + indexes[chunks[i]] = make(map[uint16]struct{}) + } + indexes[chunks[i]][ids[i]] = struct{}{} + } + } + for chunk, ids := range indexes { + elem := trienodeIndexElem{ + owner: owner, + path: chunk, + data: slices.Collect(maps.Keys(ids)), + } + if !yield(elem) { return } } diff --git a/triedb/pathdb/history_trienode_test.go b/triedb/pathdb/history_trienode_test.go index 0c0422f00f..8f9b9c2600 100644 --- a/triedb/pathdb/history_trienode_test.go +++ b/triedb/pathdb/history_trienode_test.go @@ -534,52 +534,6 @@ func TestTrienodeHistoryReaderNilKey(t *testing.T) { } } -// TestTrienodeHistoryReaderIterator tests the iterator functionality -func TestTrienodeHistoryReaderIterator(t *testing.T) { - h := makeTrienodeHistory() - - // Count expected entries - expectedCount := 0 - expectedNodes := make(map[stateIdent]bool) - for owner, nodeList := range h.nodeList { - expectedCount += len(nodeList) - for _, node := range nodeList { - expectedNodes[stateIdent{ - typ: typeTrienode, - addressHash: owner, - path: node, - }] = true - } - } - - // Test the iterator - actualCount := 0 - for x := range h.forEach() { - _ = x - actualCount++ - } - if actualCount != expectedCount { - t.Fatalf("Iterator count mismatch: expected %d, got %d", expectedCount, actualCount) - } - - // Test that iterator yields expected state identifiers - seen := make(map[stateIdent]bool) - for ident := range h.forEach() { - if ident.typ != typeTrienode { - t.Fatal("Iterator should only yield trienode history identifiers") - } - key := stateIdent{typ: ident.typ, addressHash: ident.addressHash, path: ident.path} - if seen[key] { - t.Fatal("Iterator yielded duplicate identifier") - } - seen[key] = true - - if !expectedNodes[key] { - t.Fatalf("Unexpected yielded identifier %v", key) - } - } -} - // TestCommonPrefixLen tests the commonPrefixLen helper function func TestCommonPrefixLen(t *testing.T) { tests := []struct { diff --git a/triedb/pathdb/history_trienode_utils.go b/triedb/pathdb/history_trienode_utils.go index 241b8a7d3c..11107494bb 100644 --- a/triedb/pathdb/history_trienode_utils.go +++ b/triedb/pathdb/history_trienode_utils.go @@ -21,6 +21,7 @@ import ( "fmt" "math/bits" "slices" + "strings" ) // commonPrefixLen returns the length of the common prefix shared by a and b. @@ -34,6 +35,243 @@ func commonPrefixLen(a, b []byte) int { return n } +// findLeafPaths scans a lexicographically sorted list of paths and returns +// the subset of paths that represent leaves. +// +// A path is considered a leaf if: +// - it is the last element in the list, or +// - the next path does not have the current path as its prefix. +// +// In other words, a leaf is a path that has no children extending it. +// +// Example: +// +// Input: ["a", "ab", "abc", "b", "ba"] +// Output: ["abc", "ba"] +// +// The input must be sorted; otherwise the result is undefined. +func findLeafPaths(paths []string) []string { + var leaves []string + for i := 0; i < len(paths); i++ { + if i == len(paths)-1 || !strings.HasPrefix(paths[i+1], paths[i]) { + leaves = append(leaves, paths[i]) + } + } + return leaves +} + +// hexPathNodeID computes a numeric node ID from the given path. The path is +// interpreted as a sequence of base-16 digits, where each byte of the input +// is treated as one hexadecimal digit in a big-endian number. +// +// The resulting node ID is constructed as: +// +// ID = 1 + 16 + 16^2 + ... + 16^(n-1) + value +// +// where n is the number of bytes in the path, and `value` is the base-16 +// interpretation of the byte sequence. +// +// The offset (1 + 16 + 16^2 + ... + 16^(n-1)) ensures that all IDs of shorter +// paths occupy a lower numeric range, preserving lexicographic ordering between +// differently-length paths. +// +// The numeric node ID is represented by the uint16 with the assumption the length +// of path won't be greater than 3. +func hexPathNodeID(path string) uint16 { + var ( + offset = uint16(0) + pow = uint16(1) + value = uint16(0) + bytes = []byte(path) + ) + for i := 0; i < len(bytes); i++ { + offset += pow + pow *= 16 + } + for i := 0; i < len(bytes); i++ { + value = value*16 + uint16(bytes[i]) + } + return offset + value +} + +// bitmapSize computes the number of bytes required for the marker bitmap +// corresponding to the remaining portion of a path after a cut point. +// The marker is a bitmap where each bit represents the presence of a +// possible element in the remaining path segment. +func bitmapSize(levels int) int { + // Compute: total = 1 + 16 + 16^2 + ... + 16^(segLen-1) + var ( + bits = 0 + pow = 1 + ) + for i := 0; i < levels; i++ { + bits += pow + pow *= 16 + } + // A small adjustment is applied to exclude the root element of this path + // segment, since any existing element would already imply the mutation of + // the root element. This trick can save us 1 byte for each bitmap which is + // non-trivial. + bits -= 1 + return bits / 8 +} + +// indexScheme defines how trie nodes are split into chunks and index them +// at chunk level. +// +// skipRoot indicates whether the root node should be excluded from indexing. +// cutPoints specifies the key length of chunks (in nibbles) extracted from +// each path. +type indexScheme struct { + // skipRoot indicates whether the root node should be excluded from indexing. + // In the account trie, the root is mutated on every state transition, so + // indexing it provides no value. + skipRoot bool + + // cutPoints defines the key lengths of chunks at different positions. + // A single trie node path may span multiple chunks vertically. + cutPoints []int + + // bitmaps specifies the required bitmap size for each chunk. The key is the + // chunk key length, and the value is the corresponding bitmap size. + bitmaps map[int]int +} + +var ( + // Account trie is split into chunks like this: + // + // - root node is excluded from indexing + // - nodes at level1 to level2 are grouped as 16 chunks + // - all other nodes are grouped 3 levels per chunk + // + // Level1 [0] ... [f] 16 chunks + // Level3 [000] ... [fff] 4096 chunks + // Level6 [000000] ... [fffffff] 16777216 chunks + // + // For the chunks at level1, there are 17 nodes per chunk. + // + // chunk-level 0 [ 0 ] 1 node + // chunk-level 1 [ 1 ] … [ 16 ] 16 nodes + // + // For the non-level1 chunks, there are 273 nodes per chunk, + // regardless of the chunk's depth in the trie. + // + // chunk-level 0 [ 0 ] 1 node + // chunk-level 1 [ 1 ] … [ 16 ] 16 nodes + // chunk-level 2 [ 17 ] … … [ 272 ] 256 nodes + accountIndexScheme = newIndexScheme(true) + + // Storage trie is split into chunks like this: (3 levels per chunk) + // + // Level0 [ ROOT ] 1 chunk + // Level3 [000] ... [fff] 4096 chunks + // Level6 [000000] ... [fffffff] 16777216 chunks + // + // Within each chunk, there are 273 nodes in total, regardless of + // the chunk's depth in the trie. + // + // chunk-level 0 [ 0 ] 1 node + // chunk-level 1 [ 1 ] … [ 16 ] 16 nodes + // chunk-level 2 [ 17 ] … … [ 272 ] 256 nodes + storageIndexScheme = newIndexScheme(false) +) + +// newIndexScheme initializes the index scheme. +func newIndexScheme(skipRoot bool) *indexScheme { + var ( + cuts []int + bitmaps = make(map[int]int) + ) + for v := 0; v <= 64; v += 3 { + var ( + levels int + length int + ) + if v == 0 && skipRoot { + length = 1 + levels = 2 + } else { + length = v + levels = 3 + } + cuts = append(cuts, length) + bitmaps[length] = bitmapSize(levels) + } + return &indexScheme{ + skipRoot: skipRoot, + cutPoints: cuts, + bitmaps: bitmaps, + } +} + +// getBitmapSize returns the required bytes for bitmap with chunk's position. +func (s *indexScheme) getBitmapSize(pathLen int) int { + return s.bitmaps[pathLen] +} + +// chunkSpan returns how many chunks should be spanned with the given path. +func (s *indexScheme) chunkSpan(length int) int { + var n int + for _, cut := range s.cutPoints { + if length >= cut { + n++ + continue + } + } + return n +} + +// splitPath applies the indexScheme to the given path and returns two lists: +// +// - chunkIDs: the progressive chunk IDs cuts defined by the scheme +// - innerIDs: the computed node ID for the path segment following each cut +// +// The scheme defines a set of cut points that partition the path. For each cut: +// +// - chunkIDs[i] is path[:cutPoints[i]] +// - innerIDs[i] is the node ID of the segment path[cutPoints[i] : nextCut-1] +func (s *indexScheme) splitPath(path string) ([]string, []uint16) { + // Special case: the root node of the account trie is mutated in every + // state transition, so its mutation records can be ignored. + n := len(path) + if n == 0 && s.skipRoot { + return nil, nil + } + var ( + // Determine how many chunks are spanned by the path + chunks = s.chunkSpan(n) + chunkIDs = make([]string, 0, chunks) + nodeIDs = make([]uint16, 0, chunks) + ) + for i := 0; i < chunks; i++ { + position := s.cutPoints[i] + chunkIDs = append(chunkIDs, path[:position]) + + var limit int + if i != chunks-1 { + limit = s.cutPoints[i+1] - 1 + } else { + limit = len(path) + } + nodeIDs = append(nodeIDs, hexPathNodeID(path[position:limit])) + } + return chunkIDs, nodeIDs +} + +// splitPathLast returns the path prefix of the deepest chunk spanned by the +// given path, along with its corresponding internal node ID. If the path +// spans no chunks, it returns an empty prefix and 0. +// +// nolint:unused +func (s *indexScheme) splitPathLast(path string) (string, uint16) { + chunkIDs, nodeIDs := s.splitPath(path) + if len(chunkIDs) == 0 { + return "", 0 + } + n := len(chunkIDs) + return chunkIDs[n-1], nodeIDs[n-1] +} + // encodeIDs sorts the given list of uint16 IDs and encodes them into a // compact byte slice using variable-length unsigned integer encoding. func encodeIDs(ids []uint16) []byte { diff --git a/triedb/pathdb/history_trienode_utils_test.go b/triedb/pathdb/history_trienode_utils_test.go index c3bd0d5b1f..32bd91166d 100644 --- a/triedb/pathdb/history_trienode_utils_test.go +++ b/triedb/pathdb/history_trienode_utils_test.go @@ -22,6 +22,464 @@ import ( "testing" ) +func TestHexPathNodeID(t *testing.T) { + t.Parallel() + + var suites = []struct { + input string + exp uint16 + }{ + { + input: "", + exp: 0, + }, + { + input: string([]byte{0x0}), + exp: 1, + }, + { + input: string([]byte{0xf}), + exp: 16, + }, + { + input: string([]byte{0x0, 0x0}), + exp: 17, + }, + { + input: string([]byte{0x0, 0xf}), + exp: 32, + }, + { + input: string([]byte{0x1, 0x0}), + exp: 33, + }, + { + input: string([]byte{0x1, 0xf}), + exp: 48, + }, + { + input: string([]byte{0xf, 0xf}), + exp: 272, + }, + { + input: string([]byte{0xf, 0xf, 0xf}), + exp: 4368, + }, + } + for _, suite := range suites { + got := hexPathNodeID(suite.input) + if got != suite.exp { + t.Fatalf("Unexpected node ID for %v: got %d, want %d", suite.input, got, suite.exp) + } + } +} + +func TestFindLeafPaths(t *testing.T) { + t.Parallel() + + tests := []struct { + input []string + expect []string + }{ + { + input: nil, + expect: nil, + }, + { + input: []string{"a"}, + expect: []string{"a"}, + }, + { + input: []string{"", "0", "00", "01", "1"}, + expect: []string{ + "00", + "01", + "1", + }, + }, + { + input: []string{"10", "100", "11", "2"}, + expect: []string{ + "100", + "11", + "2", + }, + }, + { + input: []string{"10", "100000000", "11", "111111111", "2"}, + expect: []string{ + "100000000", + "111111111", + "2", + }, + }, + } + for _, test := range tests { + res := findLeafPaths(test.input) + if !reflect.DeepEqual(res, test.expect) { + t.Fatalf("Unexpected result: %v, expected %v", res, test.expect) + } + } +} + +func TestSplitAccountPath(t *testing.T) { + t.Parallel() + + var suites = []struct { + input string + expPrefix []string + expID []uint16 + }{ + // Length = 0 + { + "", nil, nil, + }, + // Length = 1 + { + string([]byte{0x0}), + []string{ + string([]byte{0x0}), + }, + []uint16{ + 0, + }, + }, + { + string([]byte{0x1}), + []string{ + string([]byte{0x1}), + }, + []uint16{ + 0, + }, + }, + { + string([]byte{0xf}), + []string{ + string([]byte{0xf}), + }, + []uint16{ + 0, + }, + }, + // Length = 2 + { + string([]byte{0x0, 0x0}), + []string{ + string([]byte{0x0}), + }, + []uint16{ + 1, + }, + }, + { + string([]byte{0x0, 0x1}), + []string{ + string([]byte{0x0}), + }, + []uint16{ + 2, + }, + }, + { + string([]byte{0x0, 0xf}), + []string{ + string([]byte{0x0}), + }, + []uint16{ + 16, + }, + }, + { + string([]byte{0xf, 0xf}), + []string{ + string([]byte{0xf}), + }, + []uint16{ + 16, + }, + }, + // Length = 3 + { + string([]byte{0x0, 0x0, 0x0}), + []string{ + string([]byte{0x0}), + string([]byte{0x0, 0x0, 0x0}), + }, + []uint16{ + 1, 0, + }, + }, + // Length = 3 + { + string([]byte{0xf, 0xf, 0xf}), + []string{ + string([]byte{0xf}), + string([]byte{0xf, 0xf, 0xf}), + }, + []uint16{ + 16, 0, + }, + }, + // Length = 4 + { + string([]byte{0x0, 0x0, 0x0, 0x0}), + []string{ + string([]byte{0x0}), + string([]byte{0x0, 0x0, 0x0}), + }, + []uint16{ + 1, 1, + }, + }, + { + string([]byte{0xf, 0xf, 0xf, 0xf}), + []string{ + string([]byte{0xf}), + string([]byte{0xf, 0xf, 0xf}), + }, + []uint16{ + 16, 16, + }, + }, + // Length = 5 + { + string([]byte{0x0, 0x0, 0x0, 0x0, 0x0}), + []string{ + string([]byte{0x0}), + string([]byte{0x0, 0x0, 0x0}), + }, + []uint16{ + 1, 17, + }, + }, + { + string([]byte{0xf, 0xf, 0xf, 0xf, 0xf}), + []string{ + string([]byte{0xf}), + string([]byte{0xf, 0xf, 0xf}), + }, + []uint16{ + 16, 272, + }, + }, + // Length = 6 + { + string([]byte{0x0, 0x0, 0x0, 0x0, 0x0, 0x0}), + []string{ + string([]byte{0x0}), + string([]byte{0x0, 0x0, 0x0}), + string([]byte{0x0, 0x0, 0x0, 0x0, 0x0, 0x0}), + }, + []uint16{ + 1, 17, 0, + }, + }, + { + string([]byte{0xf, 0xf, 0xf, 0xf, 0xf, 0xf}), + []string{ + string([]byte{0xf}), + string([]byte{0xf, 0xf, 0xf}), + string([]byte{0xf, 0xf, 0xf, 0xf, 0xf, 0xf}), + }, + []uint16{ + 16, 272, 0, + }, + }, + } + for _, suite := range suites { + prefix, id := accountIndexScheme.splitPath(suite.input) + if !reflect.DeepEqual(prefix, suite.expPrefix) { + t.Fatalf("Unexpected prefix for %v: got %v, want %v", suite.input, prefix, suite.expPrefix) + } + if !reflect.DeepEqual(id, suite.expID) { + t.Fatalf("Unexpected ID for %v: got %v, want %v", suite.input, id, suite.expID) + } + } +} + +func TestSplitStoragePath(t *testing.T) { + t.Parallel() + + var suites = []struct { + input string + expPrefix []string + expID []uint16 + }{ + // Length = 0 + { + "", + []string{ + string([]byte{}), + }, + []uint16{ + 0, + }, + }, + // Length = 1 + { + string([]byte{0x0}), + []string{ + string([]byte{}), + }, + []uint16{ + 1, + }, + }, + { + string([]byte{0x1}), + []string{ + string([]byte{}), + }, + []uint16{ + 2, + }, + }, + { + string([]byte{0xf}), + []string{ + string([]byte{}), + }, + []uint16{ + 16, + }, + }, + // Length = 2 + { + string([]byte{0x0, 0x0}), + []string{ + string([]byte{}), + }, + []uint16{ + 17, + }, + }, + { + string([]byte{0x0, 0x1}), + []string{ + string([]byte{}), + }, + []uint16{ + 18, + }, + }, + { + string([]byte{0x0, 0xf}), + []string{ + string([]byte{}), + }, + []uint16{ + 32, + }, + }, + { + string([]byte{0xf, 0xf}), + []string{ + string([]byte{}), + }, + []uint16{ + 272, + }, + }, + // Length = 3 + { + string([]byte{0x0, 0x0, 0x0}), + []string{ + string([]byte{}), + string([]byte{0x0, 0x0, 0x0}), + }, + []uint16{ + 17, 0, + }, + }, + // Length = 3 + { + string([]byte{0xf, 0xf, 0xf}), + []string{ + string([]byte{}), + string([]byte{0xf, 0xf, 0xf}), + }, + []uint16{ + 272, 0, + }, + }, + // Length = 4 + { + string([]byte{0x0, 0x0, 0x0, 0x0}), + []string{ + string([]byte{}), + string([]byte{0x0, 0x0, 0x0}), + }, + []uint16{ + 17, 1, + }, + }, + { + string([]byte{0xf, 0xf, 0xf, 0xf}), + []string{ + string([]byte{}), + string([]byte{0xf, 0xf, 0xf}), + }, + []uint16{ + 272, 16, + }, + }, + // Length = 5 + { + string([]byte{0x0, 0x0, 0x0, 0x0, 0x0}), + []string{ + string([]byte{}), + string([]byte{0x0, 0x0, 0x0}), + }, + []uint16{ + 17, 17, + }, + }, + { + string([]byte{0xf, 0xf, 0xf, 0xf, 0xf}), + []string{ + string([]byte{}), + string([]byte{0xf, 0xf, 0xf}), + }, + []uint16{ + 272, 272, + }, + }, + // Length = 6 + { + string([]byte{0x0, 0x0, 0x0, 0x0, 0x0, 0x0}), + []string{ + string([]byte{}), + string([]byte{0x0, 0x0, 0x0}), + string([]byte{0x0, 0x0, 0x0, 0x0, 0x0, 0x0}), + }, + []uint16{ + 17, 17, 0, + }, + }, + { + string([]byte{0xf, 0xf, 0xf, 0xf, 0xf, 0xf}), + []string{ + string([]byte{}), + string([]byte{0xf, 0xf, 0xf}), + string([]byte{0xf, 0xf, 0xf, 0xf, 0xf, 0xf}), + }, + []uint16{ + 272, 272, 0, + }, + }, + } + for i, suite := range suites { + prefix, id := storageIndexScheme.splitPath(suite.input) + if !reflect.DeepEqual(prefix, suite.expPrefix) { + t.Fatalf("Test %d, unexpected prefix for %v: got %v, want %v", i, suite.input, prefix, suite.expPrefix) + } + if !reflect.DeepEqual(id, suite.expID) { + t.Fatalf("Test %d, unexpected ID for %v: got %v, want %v", i, suite.input, id, suite.expID) + } + } +} + func TestIsAncestor(t *testing.T) { suites := []struct { x, y uint16 diff --git a/triedb/pathdb/reader.go b/triedb/pathdb/reader.go index 842ac0972e..c76d88b594 100644 --- a/triedb/pathdb/reader.go +++ b/triedb/pathdb/reader.go @@ -200,7 +200,7 @@ func (db *Database) StateReader(root common.Hash) (database.StateReader, error) // historical state. type HistoricalStateReader struct { db *Database - reader *historyReader + reader *stateHistoryReader id uint64 } @@ -234,7 +234,7 @@ func (db *Database) HistoricReader(root common.Hash) (*HistoricalStateReader, er return &HistoricalStateReader{ id: *id, db: db, - reader: newHistoryReader(db.diskdb, db.stateFreezer), + reader: newStateHistoryReader(db.diskdb, db.stateFreezer), }, nil }