diff --git a/triedb/pathdb/history_index.go b/triedb/pathdb/history_index.go
index cc5cd204b4..0c5eb8db21 100644
--- a/triedb/pathdb/history_index.go
+++ b/triedb/pathdb/history_index.go
@@ -25,22 +25,28 @@ import (
"github.com/ethereum/go-ethereum/ethdb"
)
-// parseIndex parses the index data with the supplied byte stream. The index data
-// is a list of fixed-sized metadata. Empty metadata is regarded as invalid.
-func parseIndex(blob []byte) ([]*indexBlockDesc, error) {
+// parseIndex parses the index data from the provided byte stream. The index data
+// is a sequence of fixed-size metadata entries, and any empty metadata entry is
+// considered invalid.
+//
+// Each metadata entry consists of two components: the indexBlockDesc and an
+// optional extension bitmap. The bitmap length may vary across different categories,
+// but must remain consistent within the same category.
+func parseIndex(blob []byte, bitmapSize int) ([]*indexBlockDesc, error) {
if len(blob) == 0 {
return nil, errors.New("empty state history index")
}
- if len(blob)%indexBlockDescSize != 0 {
- return nil, fmt.Errorf("corrupted state index, len: %d", len(blob))
+ size := indexBlockDescSize + bitmapSize
+ if len(blob)%size != 0 {
+ return nil, fmt.Errorf("corrupted state index, len: %d, bitmap size: %d", len(blob), bitmapSize)
}
var (
lastID uint32
descList []*indexBlockDesc
)
- for i := 0; i < len(blob)/indexBlockDescSize; i++ {
+ for i := 0; i < len(blob)/size; i++ {
var desc indexBlockDesc
- desc.decode(blob[i*indexBlockDescSize : (i+1)*indexBlockDescSize])
+ desc.decode(blob[i*size : (i+1)*size])
if desc.empty() {
return nil, errors.New("empty state history index block")
}
@@ -69,33 +75,35 @@ func parseIndex(blob []byte) ([]*indexBlockDesc, error) {
// indexReader is the structure to look up the state history index records
// associated with the specific state element.
type indexReader struct {
- db ethdb.KeyValueReader
- descList []*indexBlockDesc
- readers map[uint32]*blockReader
- state stateIdent
+ db ethdb.KeyValueReader
+ descList []*indexBlockDesc
+ readers map[uint32]*blockReader
+ state stateIdent
+ bitmapSize int
}
// loadIndexData loads the index data associated with the specified state.
-func loadIndexData(db ethdb.KeyValueReader, state stateIdent) ([]*indexBlockDesc, error) {
+func loadIndexData(db ethdb.KeyValueReader, state stateIdent, bitmapSize int) ([]*indexBlockDesc, error) {
blob := readStateIndex(state, db)
if len(blob) == 0 {
return nil, nil
}
- return parseIndex(blob)
+ return parseIndex(blob, bitmapSize)
}
// newIndexReader constructs a index reader for the specified state. Reader with
// empty data is allowed.
-func newIndexReader(db ethdb.KeyValueReader, state stateIdent) (*indexReader, error) {
- descList, err := loadIndexData(db, state)
+func newIndexReader(db ethdb.KeyValueReader, state stateIdent, bitmapSize int) (*indexReader, error) {
+ descList, err := loadIndexData(db, state, bitmapSize)
if err != nil {
return nil, err
}
return &indexReader{
- descList: descList,
- readers: make(map[uint32]*blockReader),
- db: db,
- state: state,
+ descList: descList,
+ readers: make(map[uint32]*blockReader),
+ db: db,
+ state: state,
+ bitmapSize: bitmapSize,
}, nil
}
@@ -106,11 +114,9 @@ func (r *indexReader) refresh() error {
// may have been modified by additional elements written to the disk.
if len(r.descList) != 0 {
last := r.descList[len(r.descList)-1]
- if !last.full() {
- delete(r.readers, last.id)
- }
+ delete(r.readers, last.id)
}
- descList, err := loadIndexData(r.db, r.state)
+ descList, err := loadIndexData(r.db, r.state, r.bitmapSize)
if err != nil {
return err
}
@@ -118,26 +124,10 @@ func (r *indexReader) refresh() error {
return nil
}
-// newIterator creates an iterator for traversing the index entries.
-func (r *indexReader) newIterator() *indexIterator {
- return newIndexIterator(r.descList, func(id uint32) (*blockReader, error) {
- br, ok := r.readers[id]
- if !ok {
- var err error
- br, err = newBlockReader(readStateIndexBlock(r.state, r.db, id))
- if err != nil {
- return nil, err
- }
- r.readers[id] = br
- }
- return br, nil
- })
-}
-
// readGreaterThan locates the first element that is greater than the specified
// id. If no such element is found, MaxUint64 is returned.
func (r *indexReader) readGreaterThan(id uint64) (uint64, error) {
- it := r.newIterator()
+ it := r.newIterator(nil)
found := it.SeekGT(id)
if err := it.Error(); err != nil {
return 0, err
@@ -155,31 +145,33 @@ func (r *indexReader) readGreaterThan(id uint64) (uint64, error) {
// history ids) is stored in these second-layer index blocks, which are size
// limited.
type indexWriter struct {
- descList []*indexBlockDesc // The list of index block descriptions
- bw *blockWriter // The live index block writer
- frozen []*blockWriter // The finalized index block writers, waiting for flush
- lastID uint64 // The ID of the latest tracked history
- state stateIdent
- db ethdb.KeyValueReader
+ descList []*indexBlockDesc // The list of index block descriptions
+ bw *blockWriter // The live index block writer
+ frozen []*blockWriter // The finalized index block writers, waiting for flush
+ lastID uint64 // The ID of the latest tracked history
+ state stateIdent // The identifier of the state being indexed
+ bitmapSize int // The size of optional extension bitmap
+ db ethdb.KeyValueReader
}
// newIndexWriter constructs the index writer for the specified state. Additionally,
// it takes an integer as the limit and prunes all existing elements above that ID.
// It's essential as the recovery mechanism after unclean shutdown during the history
// indexing.
-func newIndexWriter(db ethdb.KeyValueReader, state stateIdent, limit uint64) (*indexWriter, error) {
+func newIndexWriter(db ethdb.KeyValueReader, state stateIdent, limit uint64, bitmapSize int) (*indexWriter, error) {
blob := readStateIndex(state, db)
if len(blob) == 0 {
- desc := newIndexBlockDesc(0)
- bw, _ := newBlockWriter(nil, desc, 0 /* useless if the block is empty */)
+ desc := newIndexBlockDesc(0, bitmapSize)
+ bw, _ := newBlockWriter(nil, desc, 0 /* useless if the block is empty */, bitmapSize != 0)
return &indexWriter{
- descList: []*indexBlockDesc{desc},
- bw: bw,
- state: state,
- db: db,
+ descList: []*indexBlockDesc{desc},
+ bw: bw,
+ state: state,
+ db: db,
+ bitmapSize: bitmapSize,
}, nil
}
- descList, err := parseIndex(blob)
+ descList, err := parseIndex(blob, bitmapSize)
if err != nil {
return nil, err
}
@@ -197,30 +189,31 @@ func newIndexWriter(db ethdb.KeyValueReader, state stateIdent, limit uint64) (*i
// Construct the writer for the last block. All elements in this block
// that exceed the limit will be truncated.
- bw, err := newBlockWriter(indexBlock, lastDesc, limit)
+ bw, err := newBlockWriter(indexBlock, lastDesc, limit, bitmapSize != 0)
if err != nil {
return nil, err
}
return &indexWriter{
- descList: descList,
- lastID: bw.last(),
- bw: bw,
- state: state,
- db: db,
+ descList: descList,
+ lastID: bw.last(),
+ bw: bw,
+ state: state,
+ db: db,
+ bitmapSize: bitmapSize,
}, nil
}
// append adds the new element into the index writer.
-func (w *indexWriter) append(id uint64) error {
+func (w *indexWriter) append(id uint64, ext []uint16) error {
if id <= w.lastID {
return fmt.Errorf("append element out of order, last: %d, this: %d", w.lastID, id)
}
- if w.bw.full() {
+ if w.bw.estimateFull(ext) {
if err := w.rotate(); err != nil {
return err
}
}
- if err := w.bw.append(id); err != nil {
+ if err := w.bw.append(id, ext); err != nil {
return err
}
w.lastID = id
@@ -233,10 +226,10 @@ func (w *indexWriter) append(id uint64) error {
func (w *indexWriter) rotate() error {
var (
err error
- desc = newIndexBlockDesc(w.bw.desc.id + 1)
+ desc = newIndexBlockDesc(w.bw.desc.id+1, w.bitmapSize)
)
w.frozen = append(w.frozen, w.bw)
- w.bw, err = newBlockWriter(nil, desc, 0 /* useless if the block is empty */)
+ w.bw, err = newBlockWriter(nil, desc, 0 /* useless if the block is empty */, w.bitmapSize != 0)
if err != nil {
return err
}
@@ -268,7 +261,8 @@ func (w *indexWriter) finish(batch ethdb.Batch) {
}
w.frozen = nil // release all the frozen writers
- buf := make([]byte, 0, indexBlockDescSize*len(descList))
+ size := indexBlockDescSize + w.bitmapSize
+ buf := make([]byte, 0, size*len(descList))
for _, desc := range descList {
buf = append(buf, desc.encode()...)
}
@@ -277,30 +271,32 @@ func (w *indexWriter) finish(batch ethdb.Batch) {
// indexDeleter is responsible for deleting index data for a specific state.
type indexDeleter struct {
- descList []*indexBlockDesc // The list of index block descriptions
- bw *blockWriter // The live index block writer
- dropped []uint32 // The list of index block id waiting for deleting
- lastID uint64 // The ID of the latest tracked history
- state stateIdent
- db ethdb.KeyValueReader
+ descList []*indexBlockDesc // The list of index block descriptions
+ bw *blockWriter // The live index block writer
+ dropped []uint32 // The list of index block id waiting for deleting
+ lastID uint64 // The ID of the latest tracked history
+ state stateIdent // The identifier of the state being indexed
+ bitmapSize int // The size of optional extension bitmap
+ db ethdb.KeyValueReader
}
// newIndexDeleter constructs the index deleter for the specified state.
-func newIndexDeleter(db ethdb.KeyValueReader, state stateIdent, limit uint64) (*indexDeleter, error) {
+func newIndexDeleter(db ethdb.KeyValueReader, state stateIdent, limit uint64, bitmapSize int) (*indexDeleter, error) {
blob := readStateIndex(state, db)
if len(blob) == 0 {
// TODO(rjl493456442) we can probably return an error here,
// deleter with no data is meaningless.
- desc := newIndexBlockDesc(0)
- bw, _ := newBlockWriter(nil, desc, 0 /* useless if the block is empty */)
+ desc := newIndexBlockDesc(0, bitmapSize)
+ bw, _ := newBlockWriter(nil, desc, 0 /* useless if the block is empty */, bitmapSize != 0)
return &indexDeleter{
- descList: []*indexBlockDesc{desc},
- bw: bw,
- state: state,
- db: db,
+ descList: []*indexBlockDesc{desc},
+ bw: bw,
+ state: state,
+ bitmapSize: bitmapSize,
+ db: db,
}, nil
}
- descList, err := parseIndex(blob)
+ descList, err := parseIndex(blob, bitmapSize)
if err != nil {
return nil, err
}
@@ -318,16 +314,17 @@ func newIndexDeleter(db ethdb.KeyValueReader, state stateIdent, limit uint64) (*
// Construct the writer for the last block. All elements in this block
// that exceed the limit will be truncated.
- bw, err := newBlockWriter(indexBlock, lastDesc, limit)
+ bw, err := newBlockWriter(indexBlock, lastDesc, limit, bitmapSize != 0)
if err != nil {
return nil, err
}
return &indexDeleter{
- descList: descList,
- lastID: bw.last(),
- bw: bw,
- state: state,
- db: db,
+ descList: descList,
+ lastID: bw.last(),
+ bw: bw,
+ state: state,
+ bitmapSize: bitmapSize,
+ db: db,
}, nil
}
@@ -364,7 +361,7 @@ func (d *indexDeleter) pop(id uint64) error {
// Open the previous block writer for deleting
lastDesc := d.descList[len(d.descList)-1]
indexBlock := readStateIndexBlock(d.state, d.db, lastDesc.id)
- bw, err := newBlockWriter(indexBlock, lastDesc, lastDesc.max)
+ bw, err := newBlockWriter(indexBlock, lastDesc, lastDesc.max, d.bitmapSize != 0)
if err != nil {
return err
}
@@ -390,7 +387,8 @@ func (d *indexDeleter) finish(batch ethdb.Batch) {
if d.empty() {
deleteStateIndex(d.state, batch)
} else {
- buf := make([]byte, 0, indexBlockDescSize*len(d.descList))
+ size := indexBlockDescSize + d.bitmapSize
+ buf := make([]byte, 0, size*len(d.descList))
for _, desc := range d.descList {
buf = append(buf, desc.encode()...)
}
diff --git a/triedb/pathdb/history_index_block.go b/triedb/pathdb/history_index_block.go
index 13f16b4cf3..fd43d81b78 100644
--- a/triedb/pathdb/history_index_block.go
+++ b/triedb/pathdb/history_index_block.go
@@ -17,6 +17,7 @@
package pathdb
import (
+ "bytes"
"encoding/binary"
"errors"
"fmt"
@@ -26,23 +27,27 @@ import (
)
const (
- indexBlockDescSize = 14 // The size of index block descriptor
- indexBlockEntriesCap = 4096 // The maximum number of entries can be grouped in a block
- indexBlockRestartLen = 256 // The restart interval length of index block
- historyIndexBatch = 8 * 1024 * 1024 // The number of state history indexes for constructing or deleting as batch
+ indexBlockDescSize = 14 // The size of index block descriptor
+ indexBlockMaxSize = 4096 // The maximum size of a single index block
+ indexBlockRestartLen = 256 // The restart interval length of index block
)
// indexBlockDesc represents a descriptor for an index block, which contains a
// list of state mutation records associated with a specific state (either an
// account or a storage slot).
type indexBlockDesc struct {
- max uint64 // The maximum state ID retained within the block
- entries uint16 // The number of state mutation records retained within the block
- id uint32 // The id of the index block
+ max uint64 // The maximum state ID retained within the block
+ entries uint16 // The number of state mutation records retained within the block
+ id uint32 // The id of the index block
+ extBitmap []byte // Optional fixed-size bitmap for the included extension elements
}
-func newIndexBlockDesc(id uint32) *indexBlockDesc {
- return &indexBlockDesc{id: id}
+func newIndexBlockDesc(id uint32, bitmapSize int) *indexBlockDesc {
+ var bitmap []byte
+ if bitmapSize > 0 {
+ bitmap = make([]byte, bitmapSize)
+ }
+ return &indexBlockDesc{id: id, extBitmap: bitmap}
}
// empty indicates whether the block is empty with no element retained.
@@ -50,26 +55,33 @@ func (d *indexBlockDesc) empty() bool {
return d.entries == 0
}
-// full indicates whether the number of elements in the block exceeds the
-// preconfigured limit.
-func (d *indexBlockDesc) full() bool {
- return d.entries >= indexBlockEntriesCap
-}
-
// encode packs index block descriptor into byte stream.
func (d *indexBlockDesc) encode() []byte {
- var buf [indexBlockDescSize]byte
+ buf := make([]byte, indexBlockDescSize+len(d.extBitmap))
binary.BigEndian.PutUint64(buf[0:8], d.max)
binary.BigEndian.PutUint16(buf[8:10], d.entries)
binary.BigEndian.PutUint32(buf[10:14], d.id)
+ copy(buf[indexBlockDescSize:], d.extBitmap)
return buf[:]
}
-// decode unpacks index block descriptor from byte stream.
+// decode unpacks index block descriptor from byte stream. It's safe to mutate
+// the provided byte stream after the function call.
func (d *indexBlockDesc) decode(blob []byte) {
d.max = binary.BigEndian.Uint64(blob[:8])
d.entries = binary.BigEndian.Uint16(blob[8:10])
d.id = binary.BigEndian.Uint32(blob[10:14])
+ d.extBitmap = bytes.Clone(blob[indexBlockDescSize:])
+}
+
+// copy returns a deep-copied object.
+func (d *indexBlockDesc) copy() *indexBlockDesc {
+ return &indexBlockDesc{
+ max: d.max,
+ entries: d.entries,
+ id: d.id,
+ extBitmap: bytes.Clone(d.extBitmap),
+ }
}
// parseIndexBlock parses the index block with the supplied byte stream.
@@ -97,20 +109,38 @@ func (d *indexBlockDesc) decode(blob []byte) {
// A uint16 can cover offsets in the range [0, 65536), which is more than enough
// to store 4096 integers.
//
-// Each chunk begins with the full value of the first integer, followed by
-// subsequent integers representing the differences between the current value
-// and the preceding one. Integers are encoded with variable-size for best
-// storage efficiency. Each chunk can be illustrated as below.
+// Each chunk begins with a full integer value for the first element, followed
+// by subsequent integers encoded as differences (deltas) from their preceding
+// values. All integers use variable-length encoding for optimal space efficiency.
//
-// Restart ---> +----------------+
-// | Full integer |
-// +----------------+
-// | Diff with prev |
-// +----------------+
-// | ... |
-// +----------------+
-// | Diff with prev |
-// +----------------+
+// In the updated format, each element in the chunk may optionally include an
+// "extension" section. If an extension is present, it starts with a var-size
+// integer indicating the length of the remaining extension payload, followed by
+// that many bytes. If no extension is present, the element format is identical
+// to the original version (i.e., only the integer or delta value is encoded).
+//
+// In the trienode history index, the extension field contains the list of
+// trie node IDs that fall within this range. For the given state transition,
+// these IDs represent the specific nodes in this range that were mutated.
+//
+// Whether an element includes an extension is determined by the block reader
+// based on the specification. Conceptually, a chunk is structured as:
+//
+// Restart ---> +----------------+
+// | Full integer |
+// +----------------+
+// | (Extension?) |
+// +----------------+
+// | Diff with prev |
+// +----------------+
+// | (Extension?) |
+// +----------------+
+// | ... |
+// +----------------+
+// | Diff with prev |
+// +----------------+
+// | (Extension?) |
+// +----------------+
//
// Empty index block is regarded as invalid.
func parseIndexBlock(blob []byte) ([]uint16, []byte, error) {
@@ -148,24 +178,26 @@ func parseIndexBlock(blob []byte) ([]uint16, []byte, error) {
type blockReader struct {
restarts []uint16
data []byte
+ hasExt bool
}
// newBlockReader constructs the block reader with the supplied block data.
-func newBlockReader(blob []byte) (*blockReader, error) {
+func newBlockReader(blob []byte, hasExt bool) (*blockReader, error) {
restarts, data, err := parseIndexBlock(blob)
if err != nil {
return nil, err
}
return &blockReader{
restarts: restarts,
- data: data, // safe to own the slice
+ data: data, // safe to own the slice
+ hasExt: hasExt, // flag whether extension should be resolved
}, nil
}
// readGreaterThan locates the first element in the block that is greater than
// the specified value. If no such element is found, MaxUint64 is returned.
func (br *blockReader) readGreaterThan(id uint64) (uint64, error) {
- it := newBlockIterator(br.data, br.restarts)
+ it := br.newIterator(nil)
found := it.SeekGT(id)
if err := it.Error(); err != nil {
return 0, err
@@ -180,17 +212,19 @@ type blockWriter struct {
desc *indexBlockDesc // Descriptor of the block
restarts []uint16 // Offsets into the data slice, marking the start of each section
data []byte // Aggregated encoded data slice
+ hasExt bool // Flag whether the extension field for each element exists
}
// newBlockWriter constructs a block writer. In addition to the existing data
// and block description, it takes an element ID and prunes all existing elements
// above that ID. It's essential as the recovery mechanism after unclean shutdown
// during the history indexing.
-func newBlockWriter(blob []byte, desc *indexBlockDesc, limit uint64) (*blockWriter, error) {
+func newBlockWriter(blob []byte, desc *indexBlockDesc, limit uint64, hasExt bool) (*blockWriter, error) {
if len(blob) == 0 {
return &blockWriter{
- desc: desc,
- data: make([]byte, 0, 1024),
+ desc: desc,
+ data: make([]byte, 0, 1024),
+ hasExt: hasExt,
}, nil
}
restarts, data, err := parseIndexBlock(blob)
@@ -201,6 +235,7 @@ func newBlockWriter(blob []byte, desc *indexBlockDesc, limit uint64) (*blockWrit
desc: desc,
restarts: restarts,
data: data, // safe to own the slice
+ hasExt: hasExt,
}
var trimmed int
for !writer.empty() && writer.last() > limit {
@@ -215,9 +250,26 @@ func newBlockWriter(blob []byte, desc *indexBlockDesc, limit uint64) (*blockWrit
return writer, nil
}
+// setBitmap applies the given extension elements into the bitmap.
+func (b *blockWriter) setBitmap(ext []uint16) {
+ for _, n := range ext {
+ // Node ID zero is intentionally filtered out. Any element in this range
+ // can indicate that the sub-tree's root node was mutated, so storing zero
+ // is redundant and saves one byte for bitmap.
+ if n != 0 {
+ setBit(b.desc.extBitmap, int(n-1))
+ }
+ }
+}
+
// append adds a new element to the block. The new element must be greater than
// the previous one. The provided ID is assumed to always be greater than 0.
-func (b *blockWriter) append(id uint64) error {
+//
+// ext refers to the optional extension field attached to the appended element.
+// This extension mechanism is used by trie-node history and represents a list of
+// trie node IDs that fall within the range covered by the index element
+// (typically corresponding to a sub-trie in trie-node history).
+func (b *blockWriter) append(id uint64, ext []uint16) error {
if id == 0 {
return errors.New("invalid zero id")
}
@@ -244,13 +296,29 @@ func (b *blockWriter) append(id uint64) error {
// element.
b.data = binary.AppendUvarint(b.data, id-b.desc.max)
}
+ // Extension validation
+ if (len(ext) == 0) != !b.hasExt {
+ if len(ext) == 0 {
+ return errors.New("missing extension")
+ }
+ return errors.New("unexpected extension")
+ }
+ // Append the extension if it is not nil. The extension is prefixed with a
+ // length indicator, and the block reader MUST understand this scheme and
+ // decode the extension accordingly.
+ if len(ext) > 0 {
+ b.setBitmap(ext)
+ enc := encodeIDs(ext)
+ b.data = binary.AppendUvarint(b.data, uint64(len(enc)))
+ b.data = append(b.data, enc...)
+ }
b.desc.entries++
b.desc.max = id
return nil
}
// scanSection traverses the specified section and terminates if fn returns true.
-func (b *blockWriter) scanSection(section int, fn func(uint64, int) bool) {
+func (b *blockWriter) scanSection(section int, fn func(uint64, int, []uint16) bool) error {
var (
value uint64
start = int(b.restarts[section])
@@ -269,28 +337,47 @@ func (b *blockWriter) scanSection(section int, fn func(uint64, int) bool) {
} else {
value += x
}
- if fn(value, pos) {
- return
+ // Resolve the extension if exists
+ var (
+ err error
+ ext []uint16
+ extLen int
+ )
+ if b.hasExt {
+ l, ln := binary.Uvarint(b.data[pos+n:])
+ extLen = ln + int(l)
+ ext, err = decodeIDs(b.data[pos+n+ln : pos+n+extLen])
}
+ if err != nil {
+ return err
+ }
+ if fn(value, pos, ext) {
+ return nil
+ }
+ // Shift to next position
pos += n
+ pos += extLen
}
+ return nil
}
// sectionLast returns the last element in the specified section.
-func (b *blockWriter) sectionLast(section int) uint64 {
+func (b *blockWriter) sectionLast(section int) (uint64, error) {
var n uint64
- b.scanSection(section, func(v uint64, _ int) bool {
+ if err := b.scanSection(section, func(v uint64, _ int, _ []uint16) bool {
n = v
return false
- })
- return n
+ }); err != nil {
+ return 0, err
+ }
+ return n, nil
}
// sectionSearch looks up the specified value in the given section,
// the position and the preceding value will be returned if found.
// It assumes that the preceding element exists in the section.
-func (b *blockWriter) sectionSearch(section int, n uint64) (found bool, prev uint64, pos int) {
- b.scanSection(section, func(v uint64, p int) bool {
+func (b *blockWriter) sectionSearch(section int, n uint64) (found bool, prev uint64, pos int, err error) {
+ if err := b.scanSection(section, func(v uint64, p int, _ []uint16) bool {
if n == v {
pos = p
found = true
@@ -298,8 +385,24 @@ func (b *blockWriter) sectionSearch(section int, n uint64) (found bool, prev uin
}
prev = v
return false // continue iteration
- })
- return found, prev, pos
+ }); err != nil {
+ return false, 0, 0, err
+ }
+ return found, prev, pos, nil
+}
+
+// rebuildBitmap scans the entire block and rebuilds the bitmap.
+func (b *blockWriter) rebuildBitmap() error {
+ clear(b.desc.extBitmap)
+ for i := 0; i < len(b.restarts); i++ {
+ if err := b.scanSection(i, func(v uint64, p int, ext []uint16) bool {
+ b.setBitmap(ext)
+ return false // continue iteration
+ }); err != nil {
+ return err
+ }
+ }
+ return nil
}
// pop removes the last element from the block. The assumption is held that block
@@ -315,6 +418,7 @@ func (b *blockWriter) pop(id uint64) error {
if b.desc.entries == 1 {
b.desc.max = 0
b.desc.entries = 0
+ clear(b.desc.extBitmap)
b.restarts = nil
b.data = b.data[:0]
return nil
@@ -324,28 +428,36 @@ func (b *blockWriter) pop(id uint64) error {
if b.desc.entries%indexBlockRestartLen == 1 {
b.data = b.data[:b.restarts[len(b.restarts)-1]]
b.restarts = b.restarts[:len(b.restarts)-1]
- b.desc.max = b.sectionLast(len(b.restarts) - 1)
+ last, err := b.sectionLast(len(b.restarts) - 1)
+ if err != nil {
+ return err
+ }
+ b.desc.max = last
b.desc.entries -= 1
- return nil
+ return b.rebuildBitmap()
}
// Look up the element preceding the one to be popped, in order to update
// the maximum element in the block.
- found, prev, pos := b.sectionSearch(len(b.restarts)-1, id)
+ found, prev, pos, err := b.sectionSearch(len(b.restarts)-1, id)
+ if err != nil {
+ return err
+ }
if !found {
return fmt.Errorf("pop element is not found, last: %d, this: %d", b.desc.max, id)
}
b.desc.max = prev
b.data = b.data[:pos]
b.desc.entries -= 1
- return nil
+ return b.rebuildBitmap()
}
func (b *blockWriter) empty() bool {
return b.desc.empty()
}
-func (b *blockWriter) full() bool {
- return b.desc.full()
+func (b *blockWriter) estimateFull(ext []uint16) bool {
+ size := 8 + 2*len(ext)
+ return len(b.data)+size > indexBlockMaxSize
}
// last returns the last element in the block. It should only be called when
diff --git a/triedb/pathdb/history_index_block_test.go b/triedb/pathdb/history_index_block_test.go
index f8c6d3ab87..923ae29348 100644
--- a/triedb/pathdb/history_index_block_test.go
+++ b/triedb/pathdb/history_index_block_test.go
@@ -17,6 +17,7 @@
package pathdb
import (
+ "bytes"
"math"
"math/rand"
"slices"
@@ -24,16 +25,36 @@ import (
"testing"
)
+func randomExt(bitmapSize int, n int) []uint16 {
+ if bitmapSize == 0 {
+ return nil
+ }
+ var (
+ limit = bitmapSize * 8
+ extList []uint16
+ )
+ for i := 0; i < n; i++ {
+ extList = append(extList, uint16(rand.Intn(limit+1)))
+ }
+ return extList
+}
+
func TestBlockReaderBasic(t *testing.T) {
+ testBlockReaderBasic(t, 0)
+ testBlockReaderBasic(t, 2)
+ testBlockReaderBasic(t, 34)
+}
+
+func testBlockReaderBasic(t *testing.T, bitmapSize int) {
elements := []uint64{
1, 5, 10, 11, 20,
}
- bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
+ bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, bitmapSize), 0, bitmapSize != 0)
for i := 0; i < len(elements); i++ {
- bw.append(elements[i])
+ bw.append(elements[i], randomExt(bitmapSize, 5))
}
- br, err := newBlockReader(bw.finish())
+ br, err := newBlockReader(bw.finish(), bitmapSize != 0)
if err != nil {
t.Fatalf("Failed to construct the block reader, %v", err)
}
@@ -60,18 +81,24 @@ func TestBlockReaderBasic(t *testing.T) {
}
func TestBlockReaderLarge(t *testing.T) {
+ testBlockReaderLarge(t, 0)
+ testBlockReaderLarge(t, 2)
+ testBlockReaderLarge(t, 34)
+}
+
+func testBlockReaderLarge(t *testing.T, bitmapSize int) {
var elements []uint64
for i := 0; i < 1000; i++ {
elements = append(elements, rand.Uint64())
}
slices.Sort(elements)
- bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
+ bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, bitmapSize), 0, bitmapSize != 0)
for i := 0; i < len(elements); i++ {
- bw.append(elements[i])
+ bw.append(elements[i], randomExt(bitmapSize, 5))
}
- br, err := newBlockReader(bw.finish())
+ br, err := newBlockReader(bw.finish(), bitmapSize != 0)
if err != nil {
t.Fatalf("Failed to construct the block reader, %v", err)
}
@@ -95,26 +122,32 @@ func TestBlockReaderLarge(t *testing.T) {
}
func TestBlockWriterBasic(t *testing.T) {
- bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
+ testBlockWriteBasic(t, 0)
+ testBlockWriteBasic(t, 2)
+ testBlockWriteBasic(t, 34)
+}
+
+func testBlockWriteBasic(t *testing.T, bitmapSize int) {
+ bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, bitmapSize), 0, bitmapSize != 0)
if !bw.empty() {
t.Fatal("expected empty block")
}
- bw.append(2)
- if err := bw.append(1); err == nil {
+ bw.append(2, randomExt(bitmapSize, 5))
+ if err := bw.append(1, randomExt(bitmapSize, 5)); err == nil {
t.Fatal("out-of-order insertion is not expected")
}
var maxElem uint64
for i := 0; i < 10; i++ {
- bw.append(uint64(i + 3))
+ bw.append(uint64(i+3), randomExt(bitmapSize, 5))
maxElem = uint64(i + 3)
}
- bw, err := newBlockWriter(bw.finish(), newIndexBlockDesc(0), maxElem)
+ bw, err := newBlockWriter(bw.finish(), newIndexBlockDesc(0, bitmapSize), maxElem, bitmapSize != 0)
if err != nil {
t.Fatalf("Failed to construct the block writer, %v", err)
}
for i := 0; i < 10; i++ {
- if err := bw.append(uint64(i + 100)); err != nil {
+ if err := bw.append(uint64(i+100), randomExt(bitmapSize, 5)); err != nil {
t.Fatalf("Failed to append value %d: %v", i, err)
}
}
@@ -122,58 +155,38 @@ func TestBlockWriterBasic(t *testing.T) {
}
func TestBlockWriterWithLimit(t *testing.T) {
- bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
+ testBlockWriterWithLimit(t, 0)
+ testBlockWriterWithLimit(t, 2)
+ testBlockWriterWithLimit(t, 34)
+}
- var maxElem uint64
- for i := 0; i < indexBlockRestartLen*2; i++ {
- bw.append(uint64(i + 1))
- maxElem = uint64(i + 1)
- }
+func testBlockWriterWithLimit(t *testing.T, bitmapSize int) {
+ bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, bitmapSize), 0, bitmapSize != 0)
- suites := []struct {
- limit uint64
- expMax uint64
- }{
- // nothing to truncate
- {
- maxElem, maxElem,
- },
- // truncate the last element
- {
- maxElem - 1, maxElem - 1,
- },
- // truncation around the restart boundary
- {
- uint64(indexBlockRestartLen + 1),
- uint64(indexBlockRestartLen + 1),
- },
- // truncation around the restart boundary
- {
- uint64(indexBlockRestartLen),
- uint64(indexBlockRestartLen),
- },
- {
- uint64(1), uint64(1),
- },
- // truncate the entire block, it's in theory invalid
- {
- uint64(0), uint64(0),
- },
+ var bitmaps [][]byte
+ for i := 0; i < indexBlockRestartLen+2; i++ {
+ bw.append(uint64(i+1), randomExt(bitmapSize, 5))
+ bitmaps = append(bitmaps, bytes.Clone(bw.desc.extBitmap))
}
- for i, suite := range suites {
- desc := *bw.desc
- block, err := newBlockWriter(bw.finish(), &desc, suite.limit)
+ for i := 0; i < indexBlockRestartLen+2; i++ {
+ limit := uint64(i + 1)
+
+ desc := bw.desc.copy()
+ block, err := newBlockWriter(bytes.Clone(bw.finish()), desc, limit, bitmapSize != 0)
if err != nil {
t.Fatalf("Failed to construct the block writer, %v", err)
}
- if block.desc.max != suite.expMax {
- t.Fatalf("Test %d, unexpected max value, got %d, want %d", i, block.desc.max, suite.expMax)
+ if block.desc.max != limit {
+ t.Fatalf("Test %d, unexpected max value, got %d, want %d", i, block.desc.max, limit)
+ }
+ if !bytes.Equal(desc.extBitmap, bitmaps[i]) {
+ t.Fatalf("Test %d, unexpected bitmap, got: %v, want: %v", i, block.desc.extBitmap, bitmaps[i])
}
// Re-fill the elements
var maxElem uint64
- for elem := suite.limit + 1; elem < indexBlockRestartLen*4; elem++ {
- if err := block.append(elem); err != nil {
+ for elem := limit + 1; elem < indexBlockRestartLen+4; elem++ {
+ if err := block.append(elem, randomExt(bitmapSize, 5)); err != nil {
t.Fatalf("Failed to append value %d: %v", elem, err)
}
maxElem = elem
@@ -185,9 +198,15 @@ func TestBlockWriterWithLimit(t *testing.T) {
}
func TestBlockWriterDelete(t *testing.T) {
- bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
+ testBlockWriterDelete(t, 0)
+ testBlockWriterDelete(t, 2)
+ testBlockWriterDelete(t, 34)
+}
+
+func testBlockWriterDelete(t *testing.T, bitmapSize int) {
+ bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, bitmapSize), 0, bitmapSize != 0)
for i := 0; i < 10; i++ {
- bw.append(uint64(i + 1))
+ bw.append(uint64(i+1), randomExt(bitmapSize, 5))
}
// Pop unknown id, the request should be rejected
if err := bw.pop(100); err == nil {
@@ -209,12 +228,18 @@ func TestBlockWriterDelete(t *testing.T) {
}
func TestBlcokWriterDeleteWithData(t *testing.T) {
+ testBlcokWriterDeleteWithData(t, 0)
+ testBlcokWriterDeleteWithData(t, 2)
+ testBlcokWriterDeleteWithData(t, 34)
+}
+
+func testBlcokWriterDeleteWithData(t *testing.T, bitmapSize int) {
elements := []uint64{
1, 5, 10, 11, 20,
}
- bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
+ bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, bitmapSize), 0, bitmapSize != 0)
for i := 0; i < len(elements); i++ {
- bw.append(elements[i])
+ bw.append(elements[i], randomExt(bitmapSize, 5))
}
// Re-construct the block writer with data
@@ -223,7 +248,10 @@ func TestBlcokWriterDeleteWithData(t *testing.T) {
max: 20,
entries: 5,
}
- bw, err := newBlockWriter(bw.finish(), desc, elements[len(elements)-1])
+ if bitmapSize > 0 {
+ desc.extBitmap = make([]byte, bitmapSize)
+ }
+ bw, err := newBlockWriter(bw.finish(), desc, elements[len(elements)-1], bitmapSize != 0)
if err != nil {
t.Fatalf("Failed to construct block writer %v", err)
}
@@ -234,7 +262,7 @@ func TestBlcokWriterDeleteWithData(t *testing.T) {
newTail := elements[i-1]
// Ensure the element can still be queried with no issue
- br, err := newBlockReader(bw.finish())
+ br, err := newBlockReader(bw.finish(), bitmapSize != 0)
if err != nil {
t.Fatalf("Failed to construct the block reader, %v", err)
}
@@ -266,29 +294,60 @@ func TestBlcokWriterDeleteWithData(t *testing.T) {
}
func TestCorruptedIndexBlock(t *testing.T) {
- bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
+ bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, 0), 0, false)
var maxElem uint64
for i := 0; i < 10; i++ {
- bw.append(uint64(i + 1))
+ bw.append(uint64(i+1), nil)
maxElem = uint64(i + 1)
}
buf := bw.finish()
// Mutate the buffer manually
buf[len(buf)-1]++
- _, err := newBlockWriter(buf, newIndexBlockDesc(0), maxElem)
+ _, err := newBlockWriter(buf, newIndexBlockDesc(0, 0), maxElem, false)
if err == nil {
t.Fatal("Corrupted index block data is not detected")
}
}
// BenchmarkParseIndexBlock benchmarks the performance of parseIndexBlock.
+//
+// goos: darwin
+// goarch: arm64
+// pkg: github.com/ethereum/go-ethereum/triedb/pathdb
+// cpu: Apple M1 Pro
+// BenchmarkParseIndexBlock
+// BenchmarkParseIndexBlock-8 35829495 34.16 ns/op
func BenchmarkParseIndexBlock(b *testing.B) {
// Generate a realistic index block blob
- bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
+ bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, 0), 0, false)
for i := 0; i < 4096; i++ {
- bw.append(uint64(i * 2))
+ bw.append(uint64(i*2), nil)
+ }
+ blob := bw.finish()
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _, _, err := parseIndexBlock(blob)
+ if err != nil {
+ b.Fatalf("parseIndexBlock failed: %v", err)
+ }
+ }
+}
+
+// goos: darwin
+// goarch: arm64
+// pkg: github.com/ethereum/go-ethereum/triedb/pathdb
+// cpu: Apple M1 Pro
+// BenchmarkParseIndexBlockWithExt
+// BenchmarkParseIndexBlockWithExt-8 35773242 33.72 ns/op
+func BenchmarkParseIndexBlockWithExt(b *testing.B) {
+ // Generate a realistic index block blob
+ bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, 34), 0, true)
+ for i := 0; i < 4096; i++ {
+ id, ext := uint64(i*2), randomExt(34, 3)
+ bw.append(id, ext)
}
blob := bw.finish()
@@ -302,21 +361,58 @@ func BenchmarkParseIndexBlock(b *testing.B) {
}
// BenchmarkBlockWriterAppend benchmarks the performance of indexblock.writer
+//
+// goos: darwin
+// goarch: arm64
+// pkg: github.com/ethereum/go-ethereum/triedb/pathdb
+// cpu: Apple M1 Pro
+// BenchmarkBlockWriterAppend
+// BenchmarkBlockWriterAppend-8 293611083 4.113 ns/op 3 B/op 0 allocs/op
func BenchmarkBlockWriterAppend(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
var blockID uint32
- desc := newIndexBlockDesc(blockID)
- writer, _ := newBlockWriter(nil, desc, 0)
+ desc := newIndexBlockDesc(blockID, 0)
+ writer, _ := newBlockWriter(nil, desc, 0, false)
for i := 0; i < b.N; i++ {
- if writer.full() {
+ if writer.estimateFull(nil) {
blockID += 1
- desc = newIndexBlockDesc(blockID)
- writer, _ = newBlockWriter(nil, desc, 0)
+ desc = newIndexBlockDesc(blockID, 0)
+ writer, _ = newBlockWriter(nil, desc, 0, false)
}
- if err := writer.append(writer.desc.max + 1); err != nil {
+ if err := writer.append(writer.desc.max+1, nil); err != nil {
+ b.Error(err)
+ }
+ }
+}
+
+// goos: darwin
+// goarch: arm64
+// pkg: github.com/ethereum/go-ethereum/triedb/pathdb
+// cpu: Apple M1 Pro
+// BenchmarkBlockWriterAppendWithExt
+// BenchmarkBlockWriterAppendWithExt-8 11123844 103.6 ns/op 42 B/op 2 allocs/op
+func BenchmarkBlockWriterAppendWithExt(b *testing.B) {
+ b.ReportAllocs()
+ b.ResetTimer()
+
+ var (
+ bitmapSize = 34
+ blockID uint32
+ )
+ desc := newIndexBlockDesc(blockID, bitmapSize)
+ writer, _ := newBlockWriter(nil, desc, 0, true)
+
+ for i := 0; i < b.N; i++ {
+ ext := randomExt(bitmapSize, 3)
+ if writer.estimateFull(ext) {
+ blockID += 1
+ desc = newIndexBlockDesc(blockID, bitmapSize)
+ writer, _ = newBlockWriter(nil, desc, 0, true)
+ }
+ if err := writer.append(writer.desc.max+1, ext); err != nil {
b.Error(err)
}
}
diff --git a/triedb/pathdb/history_index_iterator.go b/triedb/pathdb/history_index_iterator.go
index 1ccb39ad09..076baaa9e5 100644
--- a/triedb/pathdb/history_index_iterator.go
+++ b/triedb/pathdb/history_index_iterator.go
@@ -40,31 +40,133 @@ type HistoryIndexIterator interface {
Error() error
}
+// extFilter provides utilities for filtering index entries based on their
+// extension field.
+//
+// It supports two primary operations:
+//
+// - determine whether a given target node ID or any of its descendants
+// appears explicitly in the extension list.
+//
+// - determine whether a given target node ID or any of its descendants
+// is marked in the extension bitmap.
+//
+// Together, these checks allow callers to efficiently filter out the irrelevant
+// index entries during the lookup.
+type extFilter uint16
+
+// exists takes the entire extension field in the index block and determines
+// whether the target ID or its descendants appears. Note, any of descendant
+// can implicitly mean the presence of ancestor.
+func (f extFilter) exists(ext []byte) (bool, error) {
+ fn := uint16(f)
+ list, err := decodeIDs(ext)
+ if err != nil {
+ return false, err
+ }
+ for _, elem := range list {
+ if elem == fn {
+ return true, nil
+ }
+ if isAncestor(fn, elem) {
+ return true, nil
+ }
+ }
+ return false, nil
+}
+
+const (
+ // bitmapBytesTwoLevels is the size of the bitmap for two levels of the
+ // 16-ary tree (16 nodes total, excluding the root).
+ bitmapBytesTwoLevels = 2
+
+ // bitmapBytesThreeLevels is the size of the bitmap for three levels of
+ // the 16-ary tree (272 nodes total, excluding the root).
+ bitmapBytesThreeLevels = 34
+
+ // bitmapElementThresholdTwoLevels is the total number of elements in the
+ // two levels of a 16-ary tree (16 nodes total, excluding the root).
+ bitmapElementThresholdTwoLevels = 16
+
+ // bitmapElementThresholdThreeLevels is the total number of elements in the
+ // two levels of a 16-ary tree (16 nodes total, excluding the root).
+ bitmapElementThresholdThreeLevels = bitmapElementThresholdTwoLevels + 16*16
+)
+
+// contains takes the bitmap from the block metadata and determines whether the
+// target ID or its descendants is marked in the bitmap. Note, any of descendant
+// can implicitly mean the presence of ancestor.
+func (f extFilter) contains(bitmap []byte) (bool, error) {
+ id := int(f)
+ if id == 0 {
+ return true, nil
+ }
+ n := id - 1 // apply the position shift for excluding root node
+
+ switch len(bitmap) {
+ case 0:
+ // Bitmap is not available, return "false positive"
+ return true, nil
+ case bitmapBytesTwoLevels:
+ // Bitmap for 2-level trie with at most 16 elements inside
+ if n >= bitmapElementThresholdTwoLevels {
+ return false, fmt.Errorf("invalid extension filter %d for 2 bytes bitmap", id)
+ }
+ return isBitSet(bitmap, n), nil
+ case bitmapBytesThreeLevels:
+ // Bitmap for 3-level trie with at most 16+16*16 elements inside
+ if n >= bitmapElementThresholdThreeLevels {
+ return false, fmt.Errorf("invalid extension filter %d for 34 bytes bitmap", id)
+ } else if n >= bitmapElementThresholdTwoLevels {
+ return isBitSet(bitmap, n), nil
+ } else {
+ // Check the element itself first
+ if isBitSet(bitmap, n) {
+ return true, nil
+ }
+ // Check descendants: the presence of any descendant implicitly
+ // represents a mutation of its ancestor.
+ return bitmap[2+2*n] != 0 || bitmap[3+2*n] != 0, nil
+ }
+ default:
+ return false, fmt.Errorf("unsupported bitmap size %d", len(bitmap))
+ }
+}
+
// blockIterator is the iterator to traverse the indices within a single block.
type blockIterator struct {
// immutable fields
data []byte // Reference to the data segment within the block reader
restarts []uint16 // Offsets pointing to the restart sections within the data
+ hasExt bool // Flag whether the extension is included in the data
+
+ // Optional extension filter
+ filter *extFilter // Filters index entries based on the extension field.
// mutable fields
id uint64 // ID of the element at the iterators current position
+ ext []byte // Extension field of the element at the iterators current position
dataPtr int // Current read position within the data slice
restartPtr int // Index of the restart section where the iterator is currently positioned
exhausted bool // Flag whether the iterator has been exhausted
err error // Accumulated error during the traversal
}
-func newBlockIterator(data []byte, restarts []uint16) *blockIterator {
+func (br *blockReader) newIterator(filter *extFilter) *blockIterator {
it := &blockIterator{
- data: data, // hold the slice directly with no deep copy
- restarts: restarts, // hold the slice directly with no deep copy
+ data: br.data, // hold the slice directly with no deep copy
+ restarts: br.restarts, // hold the slice directly with no deep copy
+ hasExt: br.hasExt, // flag whether the extension should be resolved
+ filter: filter, // optional extension filter
}
it.reset()
return it
}
-func (it *blockIterator) set(dataPtr int, restartPtr int, id uint64) {
+func (it *blockIterator) set(dataPtr int, restartPtr int, id uint64, ext []byte) {
it.id = id
+ it.ext = ext
+
it.dataPtr = dataPtr
it.restartPtr = restartPtr
it.exhausted = dataPtr == len(it.data)
@@ -79,6 +181,8 @@ func (it *blockIterator) setErr(err error) {
func (it *blockIterator) reset() {
it.id = 0
+ it.ext = nil
+
it.dataPtr = -1
it.restartPtr = -1
it.exhausted = false
@@ -90,12 +194,26 @@ func (it *blockIterator) reset() {
}
}
-// SeekGT moves the iterator to the first element whose id is greater than the
+func (it *blockIterator) resolveExt(pos int) ([]byte, int, error) {
+ if !it.hasExt {
+ return nil, 0, nil
+ }
+ length, n := binary.Uvarint(it.data[pos:])
+ if n <= 0 {
+ return nil, 0, fmt.Errorf("too short for extension, pos: %d, datalen: %d", pos, len(it.data))
+ }
+ if len(it.data[pos+n:]) < int(length) {
+ return nil, 0, fmt.Errorf("too short for extension, pos: %d, length: %d, datalen: %d", pos, length, len(it.data))
+ }
+ return it.data[pos+n : pos+n+int(length)], n + int(length), nil
+}
+
+// seekGT moves the iterator to the first element whose id is greater than the
// given number. It returns whether such element exists.
//
// Note, this operation will unset the exhausted status and subsequent traversal
// is allowed.
-func (it *blockIterator) SeekGT(id uint64) bool {
+func (it *blockIterator) seekGT(id uint64) bool {
if it.err != nil {
return false
}
@@ -112,11 +230,20 @@ func (it *blockIterator) SeekGT(id uint64) bool {
return false
}
if index == 0 {
- item, n := binary.Uvarint(it.data[it.restarts[0]:])
+ pos := int(it.restarts[0])
+ item, n := binary.Uvarint(it.data[pos:])
+ if n <= 0 {
+ it.setErr(fmt.Errorf("failed to decode item at pos %d", it.restarts[0]))
+ return false
+ }
+ pos = pos + n
- // If the restart size is 1, then the restart pointer shouldn't be 0.
- // It's not practical and should be denied in the first place.
- it.set(int(it.restarts[0])+n, 0, item)
+ ext, shift, err := it.resolveExt(pos)
+ if err != nil {
+ it.setErr(err)
+ return false
+ }
+ it.set(pos+shift, 0, item, ext)
return true
}
var (
@@ -154,11 +281,18 @@ func (it *blockIterator) SeekGT(id uint64) bool {
}
pos += n
+ ext, shift, err := it.resolveExt(pos)
+ if err != nil {
+ it.setErr(err)
+ return false
+ }
+ pos += shift
+
if result > id {
if pos == limit {
- it.set(pos, restartIndex+1, result)
+ it.set(pos, restartIndex+1, result, ext)
} else {
- it.set(pos, restartIndex, result)
+ it.set(pos, restartIndex, result, ext)
}
return true
}
@@ -170,8 +304,45 @@ func (it *blockIterator) SeekGT(id uint64) bool {
}
// The element which is the first one greater than the specified id
// is exactly the one located at the restart point.
- item, n := binary.Uvarint(it.data[it.restarts[index]:])
- it.set(int(it.restarts[index])+n, index, item)
+ pos = int(it.restarts[index])
+ item, n := binary.Uvarint(it.data[pos:])
+ if n <= 0 {
+ it.setErr(fmt.Errorf("failed to decode item at pos %d", it.restarts[index]))
+ return false
+ }
+ pos = pos + n
+
+ ext, shift, err := it.resolveExt(pos)
+ if err != nil {
+ it.setErr(err)
+ return false
+ }
+ it.set(pos+shift, index, item, ext)
+ return true
+}
+
+// SeekGT implements HistoryIndexIterator, is the wrapper of the seekGT with
+// optional extension filter logic applied.
+func (it *blockIterator) SeekGT(id uint64) bool {
+ if !it.seekGT(id) {
+ return false
+ }
+ if it.filter == nil {
+ return true
+ }
+ for {
+ found, err := it.filter.exists(it.ext)
+ if err != nil {
+ it.setErr(err)
+ return false
+ }
+ if found {
+ break
+ }
+ if !it.next() {
+ return false
+ }
+ }
return true
}
@@ -183,10 +354,9 @@ func (it *blockIterator) init() {
it.restartPtr = 0
}
-// Next implements the HistoryIndexIterator, moving the iterator to the next
-// element. If the iterator has been exhausted, and boolean with false should
-// be returned.
-func (it *blockIterator) Next() bool {
+// next moves the iterator to the next element. If the iterator has been exhausted,
+// and boolean with false should be returned.
+func (it *blockIterator) next() bool {
if it.exhausted || it.err != nil {
return false
}
@@ -198,7 +368,6 @@ func (it *blockIterator) Next() bool {
it.setErr(fmt.Errorf("failed to decode item at pos %d", it.dataPtr))
return false
}
-
var val uint64
if it.dataPtr == int(it.restarts[it.restartPtr]) {
val = v
@@ -206,16 +375,48 @@ func (it *blockIterator) Next() bool {
val = it.id + v
}
+ // Decode the extension field
+ ext, shift, err := it.resolveExt(it.dataPtr + n)
+ if err != nil {
+ it.setErr(err)
+ return false
+ }
+
// Move to the next restart section if the data pointer crosses the boundary
nextRestartPtr := it.restartPtr
- if it.restartPtr < len(it.restarts)-1 && it.dataPtr+n == int(it.restarts[it.restartPtr+1]) {
+ if it.restartPtr < len(it.restarts)-1 && it.dataPtr+n+shift == int(it.restarts[it.restartPtr+1]) {
nextRestartPtr = it.restartPtr + 1
}
- it.set(it.dataPtr+n, nextRestartPtr, val)
+ it.set(it.dataPtr+n+shift, nextRestartPtr, val, ext)
return true
}
+// Next implements the HistoryIndexIterator, moving the iterator to the next
+// element. It's a wrapper of next with optional extension filter logic applied.
+func (it *blockIterator) Next() bool {
+ if !it.next() {
+ return false
+ }
+ if it.filter == nil {
+ return true
+ }
+ for {
+ found, err := it.filter.exists(it.ext)
+ if err != nil {
+ it.setErr(err)
+ return false
+ }
+ if found {
+ break
+ }
+ if !it.next() {
+ return false
+ }
+ }
+ return true
+}
+
// ID implements HistoryIndexIterator, returning the id of the element where the
// iterator is positioned at.
func (it *blockIterator) ID() uint64 {
@@ -226,15 +427,15 @@ func (it *blockIterator) ID() uint64 {
// Exhausting all the elements is not considered to be an error.
func (it *blockIterator) Error() error { return it.err }
-// blockLoader defines the method to retrieve the specific block for reading.
-type blockLoader func(id uint32) (*blockReader, error)
-
// indexIterator is an iterator to traverse the history indices belonging to the
// specific state entry.
type indexIterator struct {
// immutable fields
descList []*indexBlockDesc
- loader blockLoader
+ reader *indexReader
+
+ // Optional extension filter
+ filter *extFilter
// mutable fields
blockIt *blockIterator
@@ -243,10 +444,26 @@ type indexIterator struct {
err error
}
-func newIndexIterator(descList []*indexBlockDesc, loader blockLoader) *indexIterator {
+// newBlockIter initializes the block iterator with the specified block ID.
+func (r *indexReader) newBlockIter(id uint32, filter *extFilter) (*blockIterator, error) {
+ br, ok := r.readers[id]
+ if !ok {
+ var err error
+ br, err = newBlockReader(readStateIndexBlock(r.state, r.db, id), r.bitmapSize != 0)
+ if err != nil {
+ return nil, err
+ }
+ r.readers[id] = br
+ }
+ return br.newIterator(filter), nil
+}
+
+// newIterator initializes the index iterator with the specified extension filter.
+func (r *indexReader) newIterator(filter *extFilter) *indexIterator {
it := &indexIterator{
- descList: descList,
- loader: loader,
+ descList: r.descList,
+ reader: r,
+ filter: filter,
}
it.reset()
return it
@@ -271,16 +488,32 @@ func (it *indexIterator) reset() {
}
func (it *indexIterator) open(blockPtr int) error {
- id := it.descList[blockPtr].id
- br, err := it.loader(id)
+ blockIt, err := it.reader.newBlockIter(it.descList[blockPtr].id, it.filter)
if err != nil {
return err
}
- it.blockIt = newBlockIterator(br.data, br.restarts)
+ it.blockIt = blockIt
it.blockPtr = blockPtr
return nil
}
+func (it *indexIterator) applyFilter(index int) (int, error) {
+ if it.filter == nil {
+ return index, nil
+ }
+ for index < len(it.descList) {
+ found, err := it.filter.contains(it.descList[index].extBitmap)
+ if err != nil {
+ return 0, err
+ }
+ if found {
+ break
+ }
+ index++
+ }
+ return index, nil
+}
+
// SeekGT moves the iterator to the first element whose id is greater than the
// given number. It returns whether such element exists.
//
@@ -293,6 +526,11 @@ func (it *indexIterator) SeekGT(id uint64) bool {
index := sort.Search(len(it.descList), func(i int) bool {
return id < it.descList[i].max
})
+ index, err := it.applyFilter(index)
+ if err != nil {
+ it.setErr(err)
+ return false
+ }
if index == len(it.descList) {
return false
}
@@ -304,7 +542,13 @@ func (it *indexIterator) SeekGT(id uint64) bool {
return false
}
}
- return it.blockIt.SeekGT(id)
+ // Terminate if the element which is greater than the id can be found in the
+ // last block; otherwise move to the next block. It may happen that all the
+ // target elements in this block are all less than id.
+ if it.blockIt.SeekGT(id) {
+ return true
+ }
+ return it.Next()
}
func (it *indexIterator) init() error {
@@ -325,15 +569,23 @@ func (it *indexIterator) Next() bool {
it.setErr(err)
return false
}
-
if it.blockIt.Next() {
return true
}
- if it.blockPtr == len(it.descList)-1 {
+ it.blockPtr++
+
+ index, err := it.applyFilter(it.blockPtr)
+ if err != nil {
+ it.setErr(err)
+ return false
+ }
+ it.blockPtr = index
+
+ if it.blockPtr == len(it.descList) {
it.exhausted = true
return false
}
- if err := it.open(it.blockPtr + 1); err != nil {
+ if err := it.open(it.blockPtr); err != nil {
it.setErr(err)
return false
}
diff --git a/triedb/pathdb/history_index_iterator_test.go b/triedb/pathdb/history_index_iterator_test.go
index f0dd3fee4a..8b7591ce26 100644
--- a/triedb/pathdb/history_index_iterator_test.go
+++ b/triedb/pathdb/history_index_iterator_test.go
@@ -19,7 +19,9 @@ package pathdb
import (
"errors"
"fmt"
+ "maps"
"math/rand"
+ "slices"
"sort"
"testing"
@@ -28,12 +30,30 @@ import (
"github.com/ethereum/go-ethereum/ethdb"
)
-func makeTestIndexBlock(count int) ([]byte, []uint64) {
+func checkExt(f *extFilter, ext []uint16) bool {
+ if f == nil {
+ return true
+ }
+ fn := uint16(*f)
+
+ for _, n := range ext {
+ if n == fn {
+ return true
+ }
+ if isAncestor(fn, n) {
+ return true
+ }
+ }
+ return false
+}
+
+func makeTestIndexBlock(count int, bitmapSize int) ([]byte, []uint64, [][]uint16) {
var (
marks = make(map[uint64]bool)
- elements []uint64
+ elements = make([]uint64, 0, count)
+ extList = make([][]uint16, 0, count)
)
- bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
+ bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, bitmapSize), 0, bitmapSize != 0)
for i := 0; i < count; i++ {
n := uint64(rand.Uint32())
if marks[n] {
@@ -45,17 +65,20 @@ func makeTestIndexBlock(count int) ([]byte, []uint64) {
sort.Slice(elements, func(i, j int) bool { return elements[i] < elements[j] })
for i := 0; i < len(elements); i++ {
- bw.append(elements[i])
+ ext := randomExt(bitmapSize, 5)
+ extList = append(extList, ext)
+ bw.append(elements[i], ext)
}
data := bw.finish()
- return data, elements
+ return data, elements, extList
}
-func makeTestIndexBlocks(db ethdb.KeyValueStore, stateIdent stateIdent, count int) []uint64 {
+func makeTestIndexBlocks(db ethdb.KeyValueStore, stateIdent stateIdent, count int, bitmapSize int) ([]uint64, [][]uint16) {
var (
marks = make(map[uint64]bool)
elements []uint64
+ extList [][]uint16
)
for i := 0; i < count; i++ {
n := uint64(rand.Uint32())
@@ -67,15 +90,17 @@ func makeTestIndexBlocks(db ethdb.KeyValueStore, stateIdent stateIdent, count in
}
sort.Slice(elements, func(i, j int) bool { return elements[i] < elements[j] })
- iw, _ := newIndexWriter(db, stateIdent, 0)
+ iw, _ := newIndexWriter(db, stateIdent, 0, bitmapSize)
for i := 0; i < len(elements); i++ {
- iw.append(elements[i])
+ ext := randomExt(bitmapSize, 5)
+ extList = append(extList, ext)
+ iw.append(elements[i], ext)
}
batch := db.NewBatch()
iw.finish(batch)
batch.Write()
- return elements
+ return elements, extList
}
func checkSeekGT(it HistoryIndexIterator, input uint64, exp bool, expVal uint64) error {
@@ -113,43 +138,40 @@ func checkNext(it HistoryIndexIterator, values []uint64) error {
return it.Error()
}
-func TestBlockIteratorSeekGT(t *testing.T) {
- /* 0-size index block is not allowed
-
- data, elements := makeTestIndexBlock(0)
- testBlockIterator(t, data, elements)
- */
-
- data, elements := makeTestIndexBlock(1)
- testBlockIterator(t, data, elements)
-
- data, elements = makeTestIndexBlock(indexBlockRestartLen)
- testBlockIterator(t, data, elements)
-
- data, elements = makeTestIndexBlock(3 * indexBlockRestartLen)
- testBlockIterator(t, data, elements)
-
- data, elements = makeTestIndexBlock(indexBlockEntriesCap)
- testBlockIterator(t, data, elements)
-}
-
-func testBlockIterator(t *testing.T, data []byte, elements []uint64) {
- br, err := newBlockReader(data)
- if err != nil {
- t.Fatalf("Failed to open the block for reading, %v", err)
+func verifySeekGT(t *testing.T, elements []uint64, ext [][]uint16, newIter func(filter *extFilter) HistoryIndexIterator) {
+ set := make(map[extFilter]bool)
+ for _, extList := range ext {
+ for _, f := range extList {
+ set[extFilter(f)] = true
+ }
}
- it := newBlockIterator(br.data, br.restarts)
+ filters := slices.Collect(maps.Keys(set))
for i := 0; i < 128; i++ {
+ var filter *extFilter
+ if rand.Intn(2) == 0 && len(filters) > 0 {
+ filter = &filters[rand.Intn(len(filters))]
+ } else {
+ filter = nil
+ }
+
var input uint64
if rand.Intn(2) == 0 {
input = elements[rand.Intn(len(elements))]
} else {
input = uint64(rand.Uint32())
}
+
index := sort.Search(len(elements), func(i int) bool {
return elements[i] > input
})
+ for index < len(elements) {
+ if checkExt(filter, ext[index]) {
+ break
+ }
+ index++
+ }
+
var (
exp bool
expVal uint64
@@ -160,10 +182,17 @@ func testBlockIterator(t *testing.T, data []byte, elements []uint64) {
} else {
exp = true
expVal = elements[index]
- if index < len(elements) {
- remains = elements[index+1:]
+
+ index++
+ for index < len(elements) {
+ if checkExt(filter, ext[index]) {
+ remains = append(remains, elements[index])
+ }
+ index++
}
}
+
+ it := newIter(filter)
if err := checkSeekGT(it, input, exp, expVal); err != nil {
t.Fatal(err)
}
@@ -175,62 +204,71 @@ func testBlockIterator(t *testing.T, data []byte, elements []uint64) {
}
}
+func verifyTraversal(t *testing.T, elements []uint64, ext [][]uint16, newIter func(filter *extFilter) HistoryIndexIterator) {
+ set := make(map[extFilter]bool)
+ for _, extList := range ext {
+ for _, f := range extList {
+ set[extFilter(f)] = true
+ }
+ }
+ filters := slices.Collect(maps.Keys(set))
+
+ for i := 0; i < 16; i++ {
+ var filter *extFilter
+ if len(filters) > 0 {
+ filter = &filters[rand.Intn(len(filters))]
+ } else {
+ filter = nil
+ }
+ it := newIter(filter)
+
+ var (
+ pos int
+ exp []uint64
+ )
+ for pos < len(elements) {
+ if checkExt(filter, ext[pos]) {
+ exp = append(exp, elements[pos])
+ }
+ pos++
+ }
+ if err := checkNext(it, exp); err != nil {
+ t.Fatal(err)
+ }
+ }
+}
+
+func TestBlockIteratorSeekGT(t *testing.T) {
+ for _, size := range []int{0, 2, 34} {
+ for _, n := range []int{1, indexBlockRestartLen, 3 * indexBlockRestartLen} {
+ data, elements, ext := makeTestIndexBlock(n, size)
+
+ verifySeekGT(t, elements, ext, func(filter *extFilter) HistoryIndexIterator {
+ br, err := newBlockReader(data, size != 0)
+ if err != nil {
+ t.Fatalf("Failed to open the block for reading, %v", err)
+ }
+ return br.newIterator(filter)
+ })
+ }
+ }
+}
+
func TestIndexIteratorSeekGT(t *testing.T) {
ident := newAccountIdent(common.Hash{0x1})
- dbA := rawdb.NewMemoryDatabase()
- testIndexIterator(t, ident, dbA, makeTestIndexBlocks(dbA, ident, 1))
+ for _, size := range []int{0, 2, 34} {
+ for _, n := range []int{1, 4096, 3 * 4096} {
+ db := rawdb.NewMemoryDatabase()
+ elements, ext := makeTestIndexBlocks(db, ident, n, size)
- dbB := rawdb.NewMemoryDatabase()
- testIndexIterator(t, ident, dbB, makeTestIndexBlocks(dbB, ident, 3*indexBlockEntriesCap))
-
- dbC := rawdb.NewMemoryDatabase()
- testIndexIterator(t, ident, dbC, makeTestIndexBlocks(dbC, ident, indexBlockEntriesCap-1))
-
- dbD := rawdb.NewMemoryDatabase()
- testIndexIterator(t, ident, dbD, makeTestIndexBlocks(dbD, ident, indexBlockEntriesCap+1))
-}
-
-func testIndexIterator(t *testing.T, stateIdent stateIdent, db ethdb.Database, elements []uint64) {
- ir, err := newIndexReader(db, stateIdent)
- if err != nil {
- t.Fatalf("Failed to open the index reader, %v", err)
- }
- it := newIndexIterator(ir.descList, func(id uint32) (*blockReader, error) {
- return newBlockReader(readStateIndexBlock(stateIdent, db, id))
- })
-
- for i := 0; i < 128; i++ {
- var input uint64
- if rand.Intn(2) == 0 {
- input = elements[rand.Intn(len(elements))]
- } else {
- input = uint64(rand.Uint32())
- }
- index := sort.Search(len(elements), func(i int) bool {
- return elements[i] > input
- })
- var (
- exp bool
- expVal uint64
- remains []uint64
- )
- if index == len(elements) {
- exp = false
- } else {
- exp = true
- expVal = elements[index]
- if index < len(elements) {
- remains = elements[index+1:]
- }
- }
- if err := checkSeekGT(it, input, exp, expVal); err != nil {
- t.Fatal(err)
- }
- if exp {
- if err := checkNext(it, remains); err != nil {
- t.Fatal(err)
- }
+ verifySeekGT(t, elements, ext, func(filter *extFilter) HistoryIndexIterator {
+ ir, err := newIndexReader(db, ident, size)
+ if err != nil {
+ t.Fatalf("Failed to open the index reader, %v", err)
+ }
+ return ir.newIterator(filter)
+ })
}
}
}
@@ -242,56 +280,36 @@ func TestBlockIteratorTraversal(t *testing.T) {
testBlockIterator(t, data, elements)
*/
- data, elements := makeTestIndexBlock(1)
- testBlockIteratorTraversal(t, data, elements)
+ for _, size := range []int{0, 2, 34} {
+ for _, n := range []int{1, indexBlockRestartLen, 3 * indexBlockRestartLen} {
+ data, elements, ext := makeTestIndexBlock(n, size)
- data, elements = makeTestIndexBlock(indexBlockRestartLen)
- testBlockIteratorTraversal(t, data, elements)
-
- data, elements = makeTestIndexBlock(3 * indexBlockRestartLen)
- testBlockIteratorTraversal(t, data, elements)
-
- data, elements = makeTestIndexBlock(indexBlockEntriesCap)
- testBlockIteratorTraversal(t, data, elements)
-}
-
-func testBlockIteratorTraversal(t *testing.T, data []byte, elements []uint64) {
- br, err := newBlockReader(data)
- if err != nil {
- t.Fatalf("Failed to open the block for reading, %v", err)
- }
- it := newBlockIterator(br.data, br.restarts)
-
- if err := checkNext(it, elements); err != nil {
- t.Fatal(err)
+ verifyTraversal(t, elements, ext, func(filter *extFilter) HistoryIndexIterator {
+ br, err := newBlockReader(data, size != 0)
+ if err != nil {
+ t.Fatalf("Failed to open the block for reading, %v", err)
+ }
+ return br.newIterator(filter)
+ })
+ }
}
}
func TestIndexIteratorTraversal(t *testing.T) {
ident := newAccountIdent(common.Hash{0x1})
- dbA := rawdb.NewMemoryDatabase()
- testIndexIteratorTraversal(t, ident, dbA, makeTestIndexBlocks(dbA, ident, 1))
+ for _, size := range []int{0, 2, 34} {
+ for _, n := range []int{1, 4096, 3 * 4096} {
+ db := rawdb.NewMemoryDatabase()
+ elements, ext := makeTestIndexBlocks(db, ident, n, size)
- dbB := rawdb.NewMemoryDatabase()
- testIndexIteratorTraversal(t, ident, dbB, makeTestIndexBlocks(dbB, ident, 3*indexBlockEntriesCap))
-
- dbC := rawdb.NewMemoryDatabase()
- testIndexIteratorTraversal(t, ident, dbC, makeTestIndexBlocks(dbC, ident, indexBlockEntriesCap-1))
-
- dbD := rawdb.NewMemoryDatabase()
- testIndexIteratorTraversal(t, ident, dbD, makeTestIndexBlocks(dbD, ident, indexBlockEntriesCap+1))
-}
-
-func testIndexIteratorTraversal(t *testing.T, stateIdent stateIdent, db ethdb.KeyValueReader, elements []uint64) {
- ir, err := newIndexReader(db, stateIdent)
- if err != nil {
- t.Fatalf("Failed to open the index reader, %v", err)
- }
- it := newIndexIterator(ir.descList, func(id uint32) (*blockReader, error) {
- return newBlockReader(readStateIndexBlock(stateIdent, db, id))
- })
- if err := checkNext(it, elements); err != nil {
- t.Fatal(err)
+ verifyTraversal(t, elements, ext, func(filter *extFilter) HistoryIndexIterator {
+ ir, err := newIndexReader(db, ident, size)
+ if err != nil {
+ t.Fatalf("Failed to open the index reader, %v", err)
+ }
+ return ir.newIterator(filter)
+ })
+ }
}
}
diff --git a/triedb/pathdb/history_index_test.go b/triedb/pathdb/history_index_test.go
index 42cb04b001..2644db46b5 100644
--- a/triedb/pathdb/history_index_test.go
+++ b/triedb/pathdb/history_index_test.go
@@ -29,19 +29,25 @@ import (
)
func TestIndexReaderBasic(t *testing.T) {
+ testIndexReaderBasic(t, 0)
+ testIndexReaderBasic(t, 2)
+ testIndexReaderBasic(t, 34)
+}
+
+func testIndexReaderBasic(t *testing.T, bitmapSize int) {
elements := []uint64{
1, 5, 10, 11, 20,
}
db := rawdb.NewMemoryDatabase()
- bw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0)
+ bw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0, bitmapSize)
for i := 0; i < len(elements); i++ {
- bw.append(elements[i])
+ bw.append(elements[i], randomExt(bitmapSize, 5))
}
batch := db.NewBatch()
bw.finish(batch)
batch.Write()
- br, err := newIndexReader(db, newAccountIdent(common.Hash{0xa}))
+ br, err := newIndexReader(db, newAccountIdent(common.Hash{0xa}), bitmapSize)
if err != nil {
t.Fatalf("Failed to construct the index reader, %v", err)
}
@@ -68,22 +74,28 @@ func TestIndexReaderBasic(t *testing.T) {
}
func TestIndexReaderLarge(t *testing.T) {
+ testIndexReaderLarge(t, 0)
+ testIndexReaderLarge(t, 2)
+ testIndexReaderLarge(t, 34)
+}
+
+func testIndexReaderLarge(t *testing.T, bitmapSize int) {
var elements []uint64
- for i := 0; i < 10*indexBlockEntriesCap; i++ {
+ for i := 0; i < 10*4096; i++ {
elements = append(elements, rand.Uint64())
}
slices.Sort(elements)
db := rawdb.NewMemoryDatabase()
- bw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0)
+ bw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0, bitmapSize)
for i := 0; i < len(elements); i++ {
- bw.append(elements[i])
+ bw.append(elements[i], randomExt(bitmapSize, 5))
}
batch := db.NewBatch()
bw.finish(batch)
batch.Write()
- br, err := newIndexReader(db, newAccountIdent(common.Hash{0xa}))
+ br, err := newIndexReader(db, newAccountIdent(common.Hash{0xa}), bitmapSize)
if err != nil {
t.Fatalf("Failed to construct the index reader, %v", err)
}
@@ -107,7 +119,7 @@ func TestIndexReaderLarge(t *testing.T) {
}
func TestEmptyIndexReader(t *testing.T) {
- br, err := newIndexReader(rawdb.NewMemoryDatabase(), newAccountIdent(common.Hash{0xa}))
+ br, err := newIndexReader(rawdb.NewMemoryDatabase(), newAccountIdent(common.Hash{0xa}), 0)
if err != nil {
t.Fatalf("Failed to construct the index reader, %v", err)
}
@@ -121,27 +133,33 @@ func TestEmptyIndexReader(t *testing.T) {
}
func TestIndexWriterBasic(t *testing.T) {
+ testIndexWriterBasic(t, 0)
+ testIndexWriterBasic(t, 2)
+ testIndexWriterBasic(t, 34)
+}
+
+func testIndexWriterBasic(t *testing.T, bitmapSize int) {
db := rawdb.NewMemoryDatabase()
- iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0)
- iw.append(2)
- if err := iw.append(1); err == nil {
+ iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0, bitmapSize)
+ iw.append(2, randomExt(bitmapSize, 5))
+ if err := iw.append(1, randomExt(bitmapSize, 5)); err == nil {
t.Fatal("out-of-order insertion is not expected")
}
var maxElem uint64
for i := 0; i < 10; i++ {
- iw.append(uint64(i + 3))
+ iw.append(uint64(i+3), randomExt(bitmapSize, 5))
maxElem = uint64(i + 3)
}
batch := db.NewBatch()
iw.finish(batch)
batch.Write()
- iw, err := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), maxElem)
+ iw, err := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), maxElem, bitmapSize)
if err != nil {
t.Fatalf("Failed to construct the block writer, %v", err)
}
for i := 0; i < 10; i++ {
- if err := iw.append(uint64(i + 100)); err != nil {
+ if err := iw.append(uint64(i+100), randomExt(bitmapSize, 5)); err != nil {
t.Fatalf("Failed to append item, %v", err)
}
}
@@ -149,61 +167,37 @@ func TestIndexWriterBasic(t *testing.T) {
}
func TestIndexWriterWithLimit(t *testing.T) {
- db := rawdb.NewMemoryDatabase()
- iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0)
+ testIndexWriterWithLimit(t, 0)
+ testIndexWriterWithLimit(t, 2)
+ testIndexWriterWithLimit(t, 34)
+}
- var maxElem uint64
- for i := 0; i < indexBlockEntriesCap*2; i++ {
- iw.append(uint64(i + 1))
- maxElem = uint64(i + 1)
+func testIndexWriterWithLimit(t *testing.T, bitmapSize int) {
+ db := rawdb.NewMemoryDatabase()
+ iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0, bitmapSize)
+
+ // 200 iterations (with around 50 bytes extension) is enough to cross
+ // the block boundary (4096 bytes)
+ for i := 0; i < 200; i++ {
+ iw.append(uint64(i+1), randomExt(bitmapSize, 50))
}
batch := db.NewBatch()
iw.finish(batch)
batch.Write()
- suites := []struct {
- limit uint64
- expMax uint64
- }{
- // nothing to truncate
- {
- maxElem, maxElem,
- },
- // truncate the last element
- {
- maxElem - 1, maxElem - 1,
- },
- // truncation around the block boundary
- {
- uint64(indexBlockEntriesCap + 1),
- uint64(indexBlockEntriesCap + 1),
- },
- // truncation around the block boundary
- {
- uint64(indexBlockEntriesCap),
- uint64(indexBlockEntriesCap),
- },
- {
- uint64(1), uint64(1),
- },
- // truncate the entire index, it's in theory invalid
- {
- uint64(0), uint64(0),
- },
- }
- for i, suite := range suites {
- iw, err := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), suite.limit)
+ for i := 0; i < 200; i++ {
+ limit := uint64(i + 1)
+ iw, err := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), limit, bitmapSize)
if err != nil {
t.Fatalf("Failed to construct the index writer, %v", err)
}
- if iw.lastID != suite.expMax {
- t.Fatalf("Test %d, unexpected max value, got %d, want %d", i, iw.lastID, suite.expMax)
+ if iw.lastID != limit {
+ t.Fatalf("Test %d, unexpected max value, got %d, want %d", i, iw.lastID, limit)
}
-
// Re-fill the elements
var maxElem uint64
- for elem := suite.limit + 1; elem < indexBlockEntriesCap*4; elem++ {
- if err := iw.append(elem); err != nil {
+ for elem := limit + 1; elem < 500; elem++ {
+ if err := iw.append(elem, randomExt(bitmapSize, 5)); err != nil {
t.Fatalf("Failed to append value %d: %v", elem, err)
}
maxElem = elem
@@ -215,12 +209,20 @@ func TestIndexWriterWithLimit(t *testing.T) {
}
func TestIndexDeleterBasic(t *testing.T) {
- db := rawdb.NewMemoryDatabase()
- iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0)
+ testIndexDeleterBasic(t, 0)
+ testIndexDeleterBasic(t, 2)
+ testIndexDeleterBasic(t, 34)
+}
+func testIndexDeleterBasic(t *testing.T, bitmapSize int) {
+ db := rawdb.NewMemoryDatabase()
+ iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0, bitmapSize)
+
+ // 200 iterations (with around 50 bytes extension) is enough to cross
+ // the block boundary (4096 bytes)
var maxElem uint64
- for i := 0; i < indexBlockEntriesCap*4; i++ {
- iw.append(uint64(i + 1))
+ for i := 0; i < 200; i++ {
+ iw.append(uint64(i+1), randomExt(bitmapSize, 50))
maxElem = uint64(i + 1)
}
batch := db.NewBatch()
@@ -228,11 +230,11 @@ func TestIndexDeleterBasic(t *testing.T) {
batch.Write()
// Delete unknown id, the request should be rejected
- id, _ := newIndexDeleter(db, newAccountIdent(common.Hash{0xa}), maxElem)
- if err := id.pop(indexBlockEntriesCap * 5); err == nil {
+ id, _ := newIndexDeleter(db, newAccountIdent(common.Hash{0xa}), maxElem, bitmapSize)
+ if err := id.pop(500); err == nil {
t.Fatal("Expect error to occur for unknown id")
}
- for i := indexBlockEntriesCap * 4; i >= 1; i-- {
+ for i := 200; i >= 1; i-- {
if err := id.pop(uint64(i)); err != nil {
t.Fatalf("Unexpected error for element popping, %v", err)
}
@@ -243,57 +245,33 @@ func TestIndexDeleterBasic(t *testing.T) {
}
func TestIndexDeleterWithLimit(t *testing.T) {
- db := rawdb.NewMemoryDatabase()
- iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0)
+ testIndexDeleterWithLimit(t, 0)
+ testIndexDeleterWithLimit(t, 2)
+ testIndexDeleterWithLimit(t, 34)
+}
- var maxElem uint64
- for i := 0; i < indexBlockEntriesCap*2; i++ {
- iw.append(uint64(i + 1))
- maxElem = uint64(i + 1)
+func testIndexDeleterWithLimit(t *testing.T, bitmapSize int) {
+ db := rawdb.NewMemoryDatabase()
+ iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0, bitmapSize)
+
+ // 200 iterations (with around 50 bytes extension) is enough to cross
+ // the block boundary (4096 bytes)
+ for i := 0; i < 200; i++ {
+ iw.append(uint64(i+1), randomExt(bitmapSize, 50))
}
batch := db.NewBatch()
iw.finish(batch)
batch.Write()
- suites := []struct {
- limit uint64
- expMax uint64
- }{
- // nothing to truncate
- {
- maxElem, maxElem,
- },
- // truncate the last element
- {
- maxElem - 1, maxElem - 1,
- },
- // truncation around the block boundary
- {
- uint64(indexBlockEntriesCap + 1),
- uint64(indexBlockEntriesCap + 1),
- },
- // truncation around the block boundary
- {
- uint64(indexBlockEntriesCap),
- uint64(indexBlockEntriesCap),
- },
- {
- uint64(1), uint64(1),
- },
- // truncate the entire index, it's in theory invalid
- {
- uint64(0), uint64(0),
- },
- }
- for i, suite := range suites {
- id, err := newIndexDeleter(db, newAccountIdent(common.Hash{0xa}), suite.limit)
+ for i := 0; i < 200; i++ {
+ limit := uint64(i + 1)
+ id, err := newIndexDeleter(db, newAccountIdent(common.Hash{0xa}), limit, bitmapSize)
if err != nil {
t.Fatalf("Failed to construct the index writer, %v", err)
}
- if id.lastID != suite.expMax {
- t.Fatalf("Test %d, unexpected max value, got %d, want %d", i, id.lastID, suite.expMax)
+ if id.lastID != limit {
+ t.Fatalf("Test %d, unexpected max value, got %d, want %d", i, iw.lastID, limit)
}
-
// Keep removing elements
for elem := id.lastID; elem > 0; elem-- {
if err := id.pop(elem); err != nil {
@@ -339,7 +317,7 @@ func TestBatchIndexerWrite(t *testing.T) {
}
}
for addrHash, indexes := range accounts {
- ir, _ := newIndexReader(db, newAccountIdent(addrHash))
+ ir, _ := newIndexReader(db, newAccountIdent(addrHash), 0)
for i := 0; i < len(indexes)-1; i++ {
n, err := ir.readGreaterThan(indexes[i])
if err != nil {
@@ -359,7 +337,7 @@ func TestBatchIndexerWrite(t *testing.T) {
}
for addrHash, slots := range storages {
for slotHash, indexes := range slots {
- ir, _ := newIndexReader(db, newStorageIdent(addrHash, slotHash))
+ ir, _ := newIndexReader(db, newStorageIdent(addrHash, slotHash), 0)
for i := 0; i < len(indexes)-1; i++ {
n, err := ir.readGreaterThan(indexes[i])
if err != nil {
diff --git a/triedb/pathdb/history_indexer.go b/triedb/pathdb/history_indexer.go
index 9af7a96dc6..ddb4a293cc 100644
--- a/triedb/pathdb/history_indexer.go
+++ b/triedb/pathdb/history_indexer.go
@@ -34,7 +34,8 @@ import (
const (
// The batch size for reading state histories
- historyReadBatch = 1000
+ historyReadBatch = 1000
+ historyIndexBatch = 8 * 1024 * 1024 // The number of state history indexes for constructing or deleting as batch
stateHistoryIndexV0 = uint8(0) // initial version of state index structure
stateHistoryIndexVersion = stateHistoryIndexV0 // the current state index version
@@ -191,12 +192,12 @@ func (b *batchIndexer) finish(force bool) error {
for ident, list := range b.index {
eg.Go(func() error {
if !b.delete {
- iw, err := newIndexWriter(b.db, ident, indexed)
+ iw, err := newIndexWriter(b.db, ident, indexed, 0)
if err != nil {
return err
}
for _, n := range list {
- if err := iw.append(n); err != nil {
+ if err := iw.append(n, nil); err != nil {
return err
}
}
@@ -204,7 +205,7 @@ func (b *batchIndexer) finish(force bool) error {
iw.finish(batch)
})
} else {
- id, err := newIndexDeleter(b.db, ident, indexed)
+ id, err := newIndexDeleter(b.db, ident, indexed, 0)
if err != nil {
return err
}
diff --git a/triedb/pathdb/history_reader.go b/triedb/pathdb/history_reader.go
index 1bf4cf648d..69e7d5bd22 100644
--- a/triedb/pathdb/history_reader.go
+++ b/triedb/pathdb/history_reader.go
@@ -40,8 +40,8 @@ type indexReaderWithLimitTag struct {
}
// newIndexReaderWithLimitTag constructs a index reader with indexing position.
-func newIndexReaderWithLimitTag(db ethdb.KeyValueReader, state stateIdent, limit uint64) (*indexReaderWithLimitTag, error) {
- r, err := newIndexReader(db, state)
+func newIndexReaderWithLimitTag(db ethdb.KeyValueReader, state stateIdent, limit uint64, bitmapSize int) (*indexReaderWithLimitTag, error) {
+ r, err := newIndexReader(db, state, bitmapSize)
if err != nil {
return nil, err
}
@@ -252,7 +252,7 @@ func (r *historyReader) read(state stateIdentQuery, stateID uint64, lastID uint6
// state retrieval
ir, ok := r.readers[state.String()]
if !ok {
- ir, err = newIndexReaderWithLimitTag(r.disk, state.stateIdent, metadata.Last)
+ ir, err = newIndexReaderWithLimitTag(r.disk, state.stateIdent, metadata.Last, 0)
if err != nil {
return nil, err
}
diff --git a/triedb/pathdb/history_trienode.go b/triedb/pathdb/history_trienode.go
index 1004106af9..6c0c0fe8cc 100644
--- a/triedb/pathdb/history_trienode.go
+++ b/triedb/pathdb/history_trienode.go
@@ -159,17 +159,6 @@ func newTrienodeHistory(root common.Hash, parent common.Hash, block uint64, node
}
}
-// sharedLen returns the length of the common prefix shared by a and b.
-func sharedLen(a, b []byte) int {
- n := min(len(a), len(b))
- for i := range n {
- if a[i] != b[i] {
- return i
- }
- }
- return n
-}
-
// typ implements the history interface, returning the historical data type held.
func (h *trienodeHistory) typ() historyType {
return typeTrienodeHistory
@@ -219,7 +208,7 @@ func (h *trienodeHistory) encode() ([]byte, []byte, []byte, error) {
restarts = append(restarts, internalValOffset)
prefixLen = 0
} else {
- prefixLen = sharedLen(prevKey, key)
+ prefixLen = commonPrefixLen(prevKey, key)
}
value := h.nodes[owner][path]
diff --git a/triedb/pathdb/history_trienode_test.go b/triedb/pathdb/history_trienode_test.go
index be4740a904..0c0422f00f 100644
--- a/triedb/pathdb/history_trienode_test.go
+++ b/triedb/pathdb/history_trienode_test.go
@@ -580,8 +580,8 @@ func TestTrienodeHistoryReaderIterator(t *testing.T) {
}
}
-// TestSharedLen tests the sharedLen helper function
-func TestSharedLen(t *testing.T) {
+// TestCommonPrefixLen tests the commonPrefixLen helper function
+func TestCommonPrefixLen(t *testing.T) {
tests := []struct {
a, b []byte
expected int
@@ -610,13 +610,13 @@ func TestSharedLen(t *testing.T) {
}
for i, test := range tests {
- result := sharedLen(test.a, test.b)
+ result := commonPrefixLen(test.a, test.b)
if result != test.expected {
t.Errorf("Test %d: sharedLen(%q, %q) = %d, expected %d",
i, test.a, test.b, result, test.expected)
}
// Test commutativity
- resultReverse := sharedLen(test.b, test.a)
+ resultReverse := commonPrefixLen(test.b, test.a)
if result != resultReverse {
t.Errorf("Test %d: sharedLen is not commutative: sharedLen(a,b)=%d, sharedLen(b,a)=%d",
i, result, resultReverse)
diff --git a/triedb/pathdb/history_trienode_utils.go b/triedb/pathdb/history_trienode_utils.go
new file mode 100644
index 0000000000..0513343404
--- /dev/null
+++ b/triedb/pathdb/history_trienode_utils.go
@@ -0,0 +1,83 @@
+// Copyright 2025 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package pathdb
+
+import (
+ "encoding/binary"
+ "fmt"
+ "slices"
+)
+
+// commonPrefixLen returns the length of the common prefix shared by a and b.
+func commonPrefixLen(a, b []byte) int {
+ n := min(len(a), len(b))
+ for i := range n {
+ if a[i] != b[i] {
+ return i
+ }
+ }
+ return n
+}
+
+// encodeIDs sorts the given list of uint16 IDs and encodes them into a
+// compact byte slice using variable-length unsigned integer encoding.
+func encodeIDs(ids []uint16) []byte {
+ slices.Sort(ids)
+ buf := make([]byte, 0, len(ids))
+ for _, id := range ids {
+ buf = binary.AppendUvarint(buf, uint64(id))
+ }
+ return buf
+}
+
+// decodeIDs decodes a sequence of variable-length encoded uint16 IDs from the
+// given byte slice and returns them as a set.
+//
+// Returns an error if the input buffer does not contain a complete Uvarint value.
+func decodeIDs(buf []byte) ([]uint16, error) {
+ var res []uint16
+ for len(buf) > 0 {
+ id, n := binary.Uvarint(buf)
+ if n <= 0 {
+ return nil, fmt.Errorf("too short for decoding node id, %v", buf)
+ }
+ buf = buf[n:]
+ res = append(res, uint16(id))
+ }
+ return res, nil
+}
+
+// isAncestor reports whether node x is the ancestor of node y.
+func isAncestor(x, y uint16) bool {
+ for y > x {
+ y = (y - 1) / 16 // parentID(y) = (y - 1) / 16
+ if y == x {
+ return true
+ }
+ }
+ return false
+}
+
+// isBitSet reports whether the bit at `index` in the byte slice `b` is set.
+func isBitSet(b []byte, index int) bool {
+ return b[index/8]&(1<<(7-index%8)) != 0
+}
+
+// setBit sets the bit at `index` in the byte slice `b` to 1.
+func setBit(b []byte, index int) {
+ b[index/8] |= 1 << (7 - index%8)
+}
diff --git a/triedb/pathdb/history_trienode_utils_test.go b/triedb/pathdb/history_trienode_utils_test.go
new file mode 100644
index 0000000000..17eabb2a98
--- /dev/null
+++ b/triedb/pathdb/history_trienode_utils_test.go
@@ -0,0 +1,81 @@
+// Copyright 2025 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package pathdb
+
+import (
+ "bytes"
+ "testing"
+)
+
+func TestIsAncestor(t *testing.T) {
+ suites := []struct {
+ x, y uint16
+ want bool
+ }{
+ {0, 1, true},
+ {0, 16, true},
+ {0, 17, true},
+ {0, 272, true},
+
+ {1, 0, false},
+ {1, 2, false},
+ {1, 17, true},
+ {1, 18, true},
+ {17, 273, true},
+ {1, 1, false},
+ }
+ for _, tc := range suites {
+ result := isAncestor(tc.x, tc.y)
+ if result != tc.want {
+ t.Fatalf("isAncestor(%d, %d) = %v, want %v", tc.x, tc.y, result, tc.want)
+ }
+ }
+}
+
+func TestBitmapSet(t *testing.T) {
+ suites := []struct {
+ index int
+ expect []byte
+ }{
+ {
+ 0, []byte{0b10000000, 0x0},
+ },
+ {
+ 1, []byte{0b01000000, 0x0},
+ },
+ {
+ 7, []byte{0b00000001, 0x0},
+ },
+ {
+ 8, []byte{0b00000000, 0b10000000},
+ },
+ {
+ 15, []byte{0b00000000, 0b00000001},
+ },
+ }
+ for _, tc := range suites {
+ var buf [2]byte
+ setBit(buf[:], tc.index)
+
+ if !bytes.Equal(buf[:], tc.expect) {
+ t.Fatalf("bitmap = %v, want %v", buf, tc.expect)
+ }
+ if !isBitSet(buf[:], tc.index) {
+ t.Fatal("bit is not set")
+ }
+ }
+}