From 960c87a9442d566132a9d233e3fbc6dfa5aa4372 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 26 Nov 2025 16:07:16 +0800 Subject: [PATCH] triedb/pathdb: implement iterator of history index (#32981) This change introduces an iterator for the history index in the pathdb. It provides sequential access to historical entries, enabling efficient scanning and future features built on top of historical state traversal. --- triedb/pathdb/history_index.go | 43 +-- triedb/pathdb/history_index_block.go | 54 +-- triedb/pathdb/history_index_iterator.go | 359 +++++++++++++++++++ triedb/pathdb/history_index_iterator_test.go | 297 +++++++++++++++ 4 files changed, 684 insertions(+), 69 deletions(-) create mode 100644 triedb/pathdb/history_index_iterator.go create mode 100644 triedb/pathdb/history_index_iterator_test.go diff --git a/triedb/pathdb/history_index.go b/triedb/pathdb/history_index.go index 5b4c91d7e6..87b6e377af 100644 --- a/triedb/pathdb/history_index.go +++ b/triedb/pathdb/history_index.go @@ -20,7 +20,6 @@ import ( "errors" "fmt" "math" - "sort" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" @@ -119,30 +118,34 @@ func (r *indexReader) refresh() error { return nil } +// newIterator creates an iterator for traversing the index entries. +func (r *indexReader) newIterator() *indexIterator { + return newIndexIterator(r.descList, func(id uint32) (*blockReader, error) { + br, ok := r.readers[id] + if !ok { + var err error + br, err = newBlockReader(readStateIndexBlock(r.state, r.db, id)) + if err != nil { + return nil, err + } + r.readers[id] = br + } + return br, nil + }) +} + // readGreaterThan locates the first element that is greater than the specified // id. If no such element is found, MaxUint64 is returned. func (r *indexReader) readGreaterThan(id uint64) (uint64, error) { - index := sort.Search(len(r.descList), func(i int) bool { - return id < r.descList[i].max - }) - if index == len(r.descList) { + it := r.newIterator() + found := it.SeekGT(id) + if err := it.Error(); err != nil { + return 0, err + } + if !found { return math.MaxUint64, nil } - desc := r.descList[index] - - br, ok := r.readers[desc.id] - if !ok { - var err error - blob := readStateIndexBlock(r.state, r.db, desc.id) - br, err = newBlockReader(blob) - if err != nil { - return 0, err - } - r.readers[desc.id] = br - } - // The supplied ID is not greater than block.max, ensuring that an element - // satisfying the condition can be found. - return br.readGreaterThan(id) + return it.ID(), nil } // indexWriter is responsible for writing index data for a specific state (either diff --git a/triedb/pathdb/history_index_block.go b/triedb/pathdb/history_index_block.go index 5abdee682a..7b59c8e882 100644 --- a/triedb/pathdb/history_index_block.go +++ b/triedb/pathdb/history_index_block.go @@ -21,7 +21,6 @@ import ( "errors" "fmt" "math" - "sort" ) const ( @@ -164,58 +163,15 @@ func newBlockReader(blob []byte) (*blockReader, error) { // readGreaterThan locates the first element in the block that is greater than // the specified value. If no such element is found, MaxUint64 is returned. func (br *blockReader) readGreaterThan(id uint64) (uint64, error) { - var err error - index := sort.Search(len(br.restarts), func(i int) bool { - item, n := binary.Uvarint(br.data[br.restarts[i]:]) - if n <= 0 { - err = fmt.Errorf("failed to decode item at restart %d", br.restarts[i]) - } - return item > id - }) - if err != nil { + it := newBlockIterator(br.data, br.restarts) + found := it.SeekGT(id) + if err := it.Error(); err != nil { return 0, err } - if index == 0 { - item, _ := binary.Uvarint(br.data[br.restarts[0]:]) - return item, nil - } - var ( - start int - limit int - result uint64 - ) - if index == len(br.restarts) { - // The element being searched falls within the last restart section, - // there is no guarantee such element can be found. - start = int(br.restarts[len(br.restarts)-1]) - limit = len(br.data) - } else { - // The element being searched falls within the non-last restart section, - // such element can be found for sure. - start = int(br.restarts[index-1]) - limit = int(br.restarts[index]) - } - pos := start - for pos < limit { - x, n := binary.Uvarint(br.data[pos:]) - if pos == start { - result = x - } else { - result += x - } - if result > id { - return result, nil - } - pos += n - } - // The element which is greater than specified id is not found. - if index == len(br.restarts) { + if !found { return math.MaxUint64, nil } - // The element which is the first one greater than the specified id - // is exactly the one located at the restart point. - item, _ := binary.Uvarint(br.data[br.restarts[index]:]) - return item, nil + return it.ID(), nil } type blockWriter struct { diff --git a/triedb/pathdb/history_index_iterator.go b/triedb/pathdb/history_index_iterator.go new file mode 100644 index 0000000000..1ccb39ad09 --- /dev/null +++ b/triedb/pathdb/history_index_iterator.go @@ -0,0 +1,359 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see id + }) + if err != nil { + it.setErr(err) + return false + } + if index == 0 { + item, n := binary.Uvarint(it.data[it.restarts[0]:]) + + // If the restart size is 1, then the restart pointer shouldn't be 0. + // It's not practical and should be denied in the first place. + it.set(int(it.restarts[0])+n, 0, item) + return true + } + var ( + start int + limit int + restartIndex int // The restart section being searched below + ) + if index == len(it.restarts) { + // The element being searched falls within the last restart section, + // there is no guarantee such element can be found. + start = int(it.restarts[len(it.restarts)-1]) + limit = len(it.data) + restartIndex = len(it.restarts) - 1 + } else { + // The element being searched falls within the non-last restart section, + // such element can be found for sure. + start = int(it.restarts[index-1]) + limit = int(it.restarts[index]) + restartIndex = index - 1 + } + var ( + result uint64 + pos = start + ) + for pos < limit { + x, n := binary.Uvarint(it.data[pos:]) + if n <= 0 { + it.setErr(fmt.Errorf("failed to decode item at pos %d", pos)) + return false + } + if pos == start { + result = x + } else { + result += x + } + pos += n + + if result > id { + if pos == limit { + it.set(pos, restartIndex+1, result) + } else { + it.set(pos, restartIndex, result) + } + return true + } + } + // The element which is greater than specified id is not found. + if index == len(it.restarts) { + it.reset() + return false + } + // The element which is the first one greater than the specified id + // is exactly the one located at the restart point. + item, n := binary.Uvarint(it.data[it.restarts[index]:]) + it.set(int(it.restarts[index])+n, index, item) + return true +} + +func (it *blockIterator) init() { + if it.dataPtr != -1 { + return + } + it.dataPtr = 0 + it.restartPtr = 0 +} + +// Next implements the HistoryIndexIterator, moving the iterator to the next +// element. If the iterator has been exhausted, and boolean with false should +// be returned. +func (it *blockIterator) Next() bool { + if it.exhausted || it.err != nil { + return false + } + it.init() + + // Decode the next element pointed by the iterator + v, n := binary.Uvarint(it.data[it.dataPtr:]) + if n <= 0 { + it.setErr(fmt.Errorf("failed to decode item at pos %d", it.dataPtr)) + return false + } + + var val uint64 + if it.dataPtr == int(it.restarts[it.restartPtr]) { + val = v + } else { + val = it.id + v + } + + // Move to the next restart section if the data pointer crosses the boundary + nextRestartPtr := it.restartPtr + if it.restartPtr < len(it.restarts)-1 && it.dataPtr+n == int(it.restarts[it.restartPtr+1]) { + nextRestartPtr = it.restartPtr + 1 + } + it.set(it.dataPtr+n, nextRestartPtr, val) + + return true +} + +// ID implements HistoryIndexIterator, returning the id of the element where the +// iterator is positioned at. +func (it *blockIterator) ID() uint64 { + return it.id +} + +// Error implements HistoryIndexIterator, returning any accumulated error. +// Exhausting all the elements is not considered to be an error. +func (it *blockIterator) Error() error { return it.err } + +// blockLoader defines the method to retrieve the specific block for reading. +type blockLoader func(id uint32) (*blockReader, error) + +// indexIterator is an iterator to traverse the history indices belonging to the +// specific state entry. +type indexIterator struct { + // immutable fields + descList []*indexBlockDesc + loader blockLoader + + // mutable fields + blockIt *blockIterator + blockPtr int + exhausted bool + err error +} + +func newIndexIterator(descList []*indexBlockDesc, loader blockLoader) *indexIterator { + it := &indexIterator{ + descList: descList, + loader: loader, + } + it.reset() + return it +} + +func (it *indexIterator) setErr(err error) { + if it.err != nil { + return + } + it.err = err +} + +func (it *indexIterator) reset() { + it.blockIt = nil + it.blockPtr = -1 + it.exhausted = false + it.err = nil + + if len(it.descList) == 0 { + it.exhausted = true + } +} + +func (it *indexIterator) open(blockPtr int) error { + id := it.descList[blockPtr].id + br, err := it.loader(id) + if err != nil { + return err + } + it.blockIt = newBlockIterator(br.data, br.restarts) + it.blockPtr = blockPtr + return nil +} + +// SeekGT moves the iterator to the first element whose id is greater than the +// given number. It returns whether such element exists. +// +// Note, this operation will unset the exhausted status and subsequent traversal +// is allowed. +func (it *indexIterator) SeekGT(id uint64) bool { + if it.err != nil { + return false + } + index := sort.Search(len(it.descList), func(i int) bool { + return id < it.descList[i].max + }) + if index == len(it.descList) { + return false + } + it.exhausted = false + + if it.blockIt == nil || it.blockPtr != index { + if err := it.open(index); err != nil { + it.setErr(err) + return false + } + } + return it.blockIt.SeekGT(id) +} + +func (it *indexIterator) init() error { + if it.blockIt != nil { + return nil + } + return it.open(0) +} + +// Next implements the HistoryIndexIterator, moving the iterator to the next +// element. If the iterator has been exhausted, and boolean with false should +// be returned. +func (it *indexIterator) Next() bool { + if it.exhausted || it.err != nil { + return false + } + if err := it.init(); err != nil { + it.setErr(err) + return false + } + + if it.blockIt.Next() { + return true + } + if it.blockPtr == len(it.descList)-1 { + it.exhausted = true + return false + } + if err := it.open(it.blockPtr + 1); err != nil { + it.setErr(err) + return false + } + return it.blockIt.Next() +} + +// Error implements HistoryIndexIterator, returning any accumulated error. +// Exhausting all the elements is not considered to be an error. +func (it *indexIterator) Error() error { + if it.err != nil { + return it.err + } + if it.blockIt != nil { + return it.blockIt.Error() + } + return nil +} + +// ID implements HistoryIndexIterator, returning the id of the element where the +// iterator is positioned at. +func (it *indexIterator) ID() uint64 { + return it.blockIt.ID() +} diff --git a/triedb/pathdb/history_index_iterator_test.go b/triedb/pathdb/history_index_iterator_test.go new file mode 100644 index 0000000000..da60dc6e8f --- /dev/null +++ b/triedb/pathdb/history_index_iterator_test.go @@ -0,0 +1,297 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see input + }) + var ( + exp bool + expVal uint64 + remains []uint64 + ) + if index == len(elements) { + exp = false + } else { + exp = true + expVal = elements[index] + if index < len(elements) { + remains = elements[index+1:] + } + } + if err := checkSeekGT(it, input, exp, expVal); err != nil { + t.Fatal(err) + } + if exp { + if err := checkNext(it, remains); err != nil { + t.Fatal(err) + } + } + } +} + +func TestIndexIteratorSeekGT(t *testing.T) { + ident := newAccountIdent(common.Hash{0x1}) + + dbA := rawdb.NewMemoryDatabase() + testIndexIterator(t, ident, dbA, makeTestIndexBlocks(dbA, ident, 1)) + + dbB := rawdb.NewMemoryDatabase() + testIndexIterator(t, ident, dbB, makeTestIndexBlocks(dbB, ident, 3*indexBlockEntriesCap)) + + dbC := rawdb.NewMemoryDatabase() + testIndexIterator(t, ident, dbC, makeTestIndexBlocks(dbC, ident, indexBlockEntriesCap-1)) + + dbD := rawdb.NewMemoryDatabase() + testIndexIterator(t, ident, dbD, makeTestIndexBlocks(dbD, ident, indexBlockEntriesCap+1)) +} + +func testIndexIterator(t *testing.T, stateIdent stateIdent, db ethdb.Database, elements []uint64) { + ir, err := newIndexReader(db, stateIdent) + if err != nil { + t.Fatalf("Failed to open the index reader, %v", err) + } + it := newIndexIterator(ir.descList, func(id uint32) (*blockReader, error) { + return newBlockReader(readStateIndexBlock(stateIdent, db, id)) + }) + + for i := 0; i < 128; i++ { + var input uint64 + if rand.Intn(2) == 0 { + input = elements[rand.Intn(len(elements))] + } else { + input = uint64(rand.Uint32()) + } + index := sort.Search(len(elements), func(i int) bool { + return elements[i] > input + }) + var ( + exp bool + expVal uint64 + remains []uint64 + ) + if index == len(elements) { + exp = false + } else { + exp = true + expVal = elements[index] + if index < len(elements) { + remains = elements[index+1:] + } + } + if err := checkSeekGT(it, input, exp, expVal); err != nil { + t.Fatal(err) + } + if exp { + if err := checkNext(it, remains); err != nil { + t.Fatal(err) + } + } + } +} + +func TestBlockIteratorTraversal(t *testing.T) { + /* 0-size index block is not allowed + + data, elements := makeTestIndexBlock(0) + testBlockIterator(t, data, elements) + */ + + data, elements := makeTestIndexBlock(1) + testBlockIteratorTraversal(t, data, elements) + + data, elements = makeTestIndexBlock(indexBlockRestartLen) + testBlockIteratorTraversal(t, data, elements) + + data, elements = makeTestIndexBlock(3 * indexBlockRestartLen) + testBlockIteratorTraversal(t, data, elements) + + data, elements = makeTestIndexBlock(indexBlockEntriesCap) + testBlockIteratorTraversal(t, data, elements) +} + +func testBlockIteratorTraversal(t *testing.T, data []byte, elements []uint64) { + br, err := newBlockReader(data) + if err != nil { + t.Fatalf("Failed to open the block for reading, %v", err) + } + it := newBlockIterator(br.data, br.restarts) + + if err := checkNext(it, elements); err != nil { + t.Fatal(err) + } +} + +func TestIndexIteratorTraversal(t *testing.T) { + ident := newAccountIdent(common.Hash{0x1}) + + dbA := rawdb.NewMemoryDatabase() + testIndexIteratorTraversal(t, ident, dbA, makeTestIndexBlocks(dbA, ident, 1)) + + dbB := rawdb.NewMemoryDatabase() + testIndexIteratorTraversal(t, ident, dbB, makeTestIndexBlocks(dbB, ident, 3*indexBlockEntriesCap)) + + dbC := rawdb.NewMemoryDatabase() + testIndexIteratorTraversal(t, ident, dbC, makeTestIndexBlocks(dbC, ident, indexBlockEntriesCap-1)) + + dbD := rawdb.NewMemoryDatabase() + testIndexIteratorTraversal(t, ident, dbD, makeTestIndexBlocks(dbD, ident, indexBlockEntriesCap+1)) +} + +func testIndexIteratorTraversal(t *testing.T, stateIdent stateIdent, db ethdb.KeyValueReader, elements []uint64) { + ir, err := newIndexReader(db, stateIdent) + if err != nil { + t.Fatalf("Failed to open the index reader, %v", err) + } + it := newIndexIterator(ir.descList, func(id uint32) (*blockReader, error) { + return newBlockReader(readStateIndexBlock(stateIdent, db, id)) + }) + if err := checkNext(it, elements); err != nil { + t.Fatal(err) + } +}