diff --git a/triedb/pathdb/history_index.go b/triedb/pathdb/history_index.go index cc5cd204b4..0c5eb8db21 100644 --- a/triedb/pathdb/history_index.go +++ b/triedb/pathdb/history_index.go @@ -25,22 +25,28 @@ import ( "github.com/ethereum/go-ethereum/ethdb" ) -// parseIndex parses the index data with the supplied byte stream. The index data -// is a list of fixed-sized metadata. Empty metadata is regarded as invalid. -func parseIndex(blob []byte) ([]*indexBlockDesc, error) { +// parseIndex parses the index data from the provided byte stream. The index data +// is a sequence of fixed-size metadata entries, and any empty metadata entry is +// considered invalid. +// +// Each metadata entry consists of two components: the indexBlockDesc and an +// optional extension bitmap. The bitmap length may vary across different categories, +// but must remain consistent within the same category. +func parseIndex(blob []byte, bitmapSize int) ([]*indexBlockDesc, error) { if len(blob) == 0 { return nil, errors.New("empty state history index") } - if len(blob)%indexBlockDescSize != 0 { - return nil, fmt.Errorf("corrupted state index, len: %d", len(blob)) + size := indexBlockDescSize + bitmapSize + if len(blob)%size != 0 { + return nil, fmt.Errorf("corrupted state index, len: %d, bitmap size: %d", len(blob), bitmapSize) } var ( lastID uint32 descList []*indexBlockDesc ) - for i := 0; i < len(blob)/indexBlockDescSize; i++ { + for i := 0; i < len(blob)/size; i++ { var desc indexBlockDesc - desc.decode(blob[i*indexBlockDescSize : (i+1)*indexBlockDescSize]) + desc.decode(blob[i*size : (i+1)*size]) if desc.empty() { return nil, errors.New("empty state history index block") } @@ -69,33 +75,35 @@ func parseIndex(blob []byte) ([]*indexBlockDesc, error) { // indexReader is the structure to look up the state history index records // associated with the specific state element. type indexReader struct { - db ethdb.KeyValueReader - descList []*indexBlockDesc - readers map[uint32]*blockReader - state stateIdent + db ethdb.KeyValueReader + descList []*indexBlockDesc + readers map[uint32]*blockReader + state stateIdent + bitmapSize int } // loadIndexData loads the index data associated with the specified state. -func loadIndexData(db ethdb.KeyValueReader, state stateIdent) ([]*indexBlockDesc, error) { +func loadIndexData(db ethdb.KeyValueReader, state stateIdent, bitmapSize int) ([]*indexBlockDesc, error) { blob := readStateIndex(state, db) if len(blob) == 0 { return nil, nil } - return parseIndex(blob) + return parseIndex(blob, bitmapSize) } // newIndexReader constructs a index reader for the specified state. Reader with // empty data is allowed. -func newIndexReader(db ethdb.KeyValueReader, state stateIdent) (*indexReader, error) { - descList, err := loadIndexData(db, state) +func newIndexReader(db ethdb.KeyValueReader, state stateIdent, bitmapSize int) (*indexReader, error) { + descList, err := loadIndexData(db, state, bitmapSize) if err != nil { return nil, err } return &indexReader{ - descList: descList, - readers: make(map[uint32]*blockReader), - db: db, - state: state, + descList: descList, + readers: make(map[uint32]*blockReader), + db: db, + state: state, + bitmapSize: bitmapSize, }, nil } @@ -106,11 +114,9 @@ func (r *indexReader) refresh() error { // may have been modified by additional elements written to the disk. if len(r.descList) != 0 { last := r.descList[len(r.descList)-1] - if !last.full() { - delete(r.readers, last.id) - } + delete(r.readers, last.id) } - descList, err := loadIndexData(r.db, r.state) + descList, err := loadIndexData(r.db, r.state, r.bitmapSize) if err != nil { return err } @@ -118,26 +124,10 @@ func (r *indexReader) refresh() error { return nil } -// newIterator creates an iterator for traversing the index entries. -func (r *indexReader) newIterator() *indexIterator { - return newIndexIterator(r.descList, func(id uint32) (*blockReader, error) { - br, ok := r.readers[id] - if !ok { - var err error - br, err = newBlockReader(readStateIndexBlock(r.state, r.db, id)) - if err != nil { - return nil, err - } - r.readers[id] = br - } - return br, nil - }) -} - // readGreaterThan locates the first element that is greater than the specified // id. If no such element is found, MaxUint64 is returned. func (r *indexReader) readGreaterThan(id uint64) (uint64, error) { - it := r.newIterator() + it := r.newIterator(nil) found := it.SeekGT(id) if err := it.Error(); err != nil { return 0, err @@ -155,31 +145,33 @@ func (r *indexReader) readGreaterThan(id uint64) (uint64, error) { // history ids) is stored in these second-layer index blocks, which are size // limited. type indexWriter struct { - descList []*indexBlockDesc // The list of index block descriptions - bw *blockWriter // The live index block writer - frozen []*blockWriter // The finalized index block writers, waiting for flush - lastID uint64 // The ID of the latest tracked history - state stateIdent - db ethdb.KeyValueReader + descList []*indexBlockDesc // The list of index block descriptions + bw *blockWriter // The live index block writer + frozen []*blockWriter // The finalized index block writers, waiting for flush + lastID uint64 // The ID of the latest tracked history + state stateIdent // The identifier of the state being indexed + bitmapSize int // The size of optional extension bitmap + db ethdb.KeyValueReader } // newIndexWriter constructs the index writer for the specified state. Additionally, // it takes an integer as the limit and prunes all existing elements above that ID. // It's essential as the recovery mechanism after unclean shutdown during the history // indexing. -func newIndexWriter(db ethdb.KeyValueReader, state stateIdent, limit uint64) (*indexWriter, error) { +func newIndexWriter(db ethdb.KeyValueReader, state stateIdent, limit uint64, bitmapSize int) (*indexWriter, error) { blob := readStateIndex(state, db) if len(blob) == 0 { - desc := newIndexBlockDesc(0) - bw, _ := newBlockWriter(nil, desc, 0 /* useless if the block is empty */) + desc := newIndexBlockDesc(0, bitmapSize) + bw, _ := newBlockWriter(nil, desc, 0 /* useless if the block is empty */, bitmapSize != 0) return &indexWriter{ - descList: []*indexBlockDesc{desc}, - bw: bw, - state: state, - db: db, + descList: []*indexBlockDesc{desc}, + bw: bw, + state: state, + db: db, + bitmapSize: bitmapSize, }, nil } - descList, err := parseIndex(blob) + descList, err := parseIndex(blob, bitmapSize) if err != nil { return nil, err } @@ -197,30 +189,31 @@ func newIndexWriter(db ethdb.KeyValueReader, state stateIdent, limit uint64) (*i // Construct the writer for the last block. All elements in this block // that exceed the limit will be truncated. - bw, err := newBlockWriter(indexBlock, lastDesc, limit) + bw, err := newBlockWriter(indexBlock, lastDesc, limit, bitmapSize != 0) if err != nil { return nil, err } return &indexWriter{ - descList: descList, - lastID: bw.last(), - bw: bw, - state: state, - db: db, + descList: descList, + lastID: bw.last(), + bw: bw, + state: state, + db: db, + bitmapSize: bitmapSize, }, nil } // append adds the new element into the index writer. -func (w *indexWriter) append(id uint64) error { +func (w *indexWriter) append(id uint64, ext []uint16) error { if id <= w.lastID { return fmt.Errorf("append element out of order, last: %d, this: %d", w.lastID, id) } - if w.bw.full() { + if w.bw.estimateFull(ext) { if err := w.rotate(); err != nil { return err } } - if err := w.bw.append(id); err != nil { + if err := w.bw.append(id, ext); err != nil { return err } w.lastID = id @@ -233,10 +226,10 @@ func (w *indexWriter) append(id uint64) error { func (w *indexWriter) rotate() error { var ( err error - desc = newIndexBlockDesc(w.bw.desc.id + 1) + desc = newIndexBlockDesc(w.bw.desc.id+1, w.bitmapSize) ) w.frozen = append(w.frozen, w.bw) - w.bw, err = newBlockWriter(nil, desc, 0 /* useless if the block is empty */) + w.bw, err = newBlockWriter(nil, desc, 0 /* useless if the block is empty */, w.bitmapSize != 0) if err != nil { return err } @@ -268,7 +261,8 @@ func (w *indexWriter) finish(batch ethdb.Batch) { } w.frozen = nil // release all the frozen writers - buf := make([]byte, 0, indexBlockDescSize*len(descList)) + size := indexBlockDescSize + w.bitmapSize + buf := make([]byte, 0, size*len(descList)) for _, desc := range descList { buf = append(buf, desc.encode()...) } @@ -277,30 +271,32 @@ func (w *indexWriter) finish(batch ethdb.Batch) { // indexDeleter is responsible for deleting index data for a specific state. type indexDeleter struct { - descList []*indexBlockDesc // The list of index block descriptions - bw *blockWriter // The live index block writer - dropped []uint32 // The list of index block id waiting for deleting - lastID uint64 // The ID of the latest tracked history - state stateIdent - db ethdb.KeyValueReader + descList []*indexBlockDesc // The list of index block descriptions + bw *blockWriter // The live index block writer + dropped []uint32 // The list of index block id waiting for deleting + lastID uint64 // The ID of the latest tracked history + state stateIdent // The identifier of the state being indexed + bitmapSize int // The size of optional extension bitmap + db ethdb.KeyValueReader } // newIndexDeleter constructs the index deleter for the specified state. -func newIndexDeleter(db ethdb.KeyValueReader, state stateIdent, limit uint64) (*indexDeleter, error) { +func newIndexDeleter(db ethdb.KeyValueReader, state stateIdent, limit uint64, bitmapSize int) (*indexDeleter, error) { blob := readStateIndex(state, db) if len(blob) == 0 { // TODO(rjl493456442) we can probably return an error here, // deleter with no data is meaningless. - desc := newIndexBlockDesc(0) - bw, _ := newBlockWriter(nil, desc, 0 /* useless if the block is empty */) + desc := newIndexBlockDesc(0, bitmapSize) + bw, _ := newBlockWriter(nil, desc, 0 /* useless if the block is empty */, bitmapSize != 0) return &indexDeleter{ - descList: []*indexBlockDesc{desc}, - bw: bw, - state: state, - db: db, + descList: []*indexBlockDesc{desc}, + bw: bw, + state: state, + bitmapSize: bitmapSize, + db: db, }, nil } - descList, err := parseIndex(blob) + descList, err := parseIndex(blob, bitmapSize) if err != nil { return nil, err } @@ -318,16 +314,17 @@ func newIndexDeleter(db ethdb.KeyValueReader, state stateIdent, limit uint64) (* // Construct the writer for the last block. All elements in this block // that exceed the limit will be truncated. - bw, err := newBlockWriter(indexBlock, lastDesc, limit) + bw, err := newBlockWriter(indexBlock, lastDesc, limit, bitmapSize != 0) if err != nil { return nil, err } return &indexDeleter{ - descList: descList, - lastID: bw.last(), - bw: bw, - state: state, - db: db, + descList: descList, + lastID: bw.last(), + bw: bw, + state: state, + bitmapSize: bitmapSize, + db: db, }, nil } @@ -364,7 +361,7 @@ func (d *indexDeleter) pop(id uint64) error { // Open the previous block writer for deleting lastDesc := d.descList[len(d.descList)-1] indexBlock := readStateIndexBlock(d.state, d.db, lastDesc.id) - bw, err := newBlockWriter(indexBlock, lastDesc, lastDesc.max) + bw, err := newBlockWriter(indexBlock, lastDesc, lastDesc.max, d.bitmapSize != 0) if err != nil { return err } @@ -390,7 +387,8 @@ func (d *indexDeleter) finish(batch ethdb.Batch) { if d.empty() { deleteStateIndex(d.state, batch) } else { - buf := make([]byte, 0, indexBlockDescSize*len(d.descList)) + size := indexBlockDescSize + d.bitmapSize + buf := make([]byte, 0, size*len(d.descList)) for _, desc := range d.descList { buf = append(buf, desc.encode()...) } diff --git a/triedb/pathdb/history_index_block.go b/triedb/pathdb/history_index_block.go index 13f16b4cf3..fd43d81b78 100644 --- a/triedb/pathdb/history_index_block.go +++ b/triedb/pathdb/history_index_block.go @@ -17,6 +17,7 @@ package pathdb import ( + "bytes" "encoding/binary" "errors" "fmt" @@ -26,23 +27,27 @@ import ( ) const ( - indexBlockDescSize = 14 // The size of index block descriptor - indexBlockEntriesCap = 4096 // The maximum number of entries can be grouped in a block - indexBlockRestartLen = 256 // The restart interval length of index block - historyIndexBatch = 8 * 1024 * 1024 // The number of state history indexes for constructing or deleting as batch + indexBlockDescSize = 14 // The size of index block descriptor + indexBlockMaxSize = 4096 // The maximum size of a single index block + indexBlockRestartLen = 256 // The restart interval length of index block ) // indexBlockDesc represents a descriptor for an index block, which contains a // list of state mutation records associated with a specific state (either an // account or a storage slot). type indexBlockDesc struct { - max uint64 // The maximum state ID retained within the block - entries uint16 // The number of state mutation records retained within the block - id uint32 // The id of the index block + max uint64 // The maximum state ID retained within the block + entries uint16 // The number of state mutation records retained within the block + id uint32 // The id of the index block + extBitmap []byte // Optional fixed-size bitmap for the included extension elements } -func newIndexBlockDesc(id uint32) *indexBlockDesc { - return &indexBlockDesc{id: id} +func newIndexBlockDesc(id uint32, bitmapSize int) *indexBlockDesc { + var bitmap []byte + if bitmapSize > 0 { + bitmap = make([]byte, bitmapSize) + } + return &indexBlockDesc{id: id, extBitmap: bitmap} } // empty indicates whether the block is empty with no element retained. @@ -50,26 +55,33 @@ func (d *indexBlockDesc) empty() bool { return d.entries == 0 } -// full indicates whether the number of elements in the block exceeds the -// preconfigured limit. -func (d *indexBlockDesc) full() bool { - return d.entries >= indexBlockEntriesCap -} - // encode packs index block descriptor into byte stream. func (d *indexBlockDesc) encode() []byte { - var buf [indexBlockDescSize]byte + buf := make([]byte, indexBlockDescSize+len(d.extBitmap)) binary.BigEndian.PutUint64(buf[0:8], d.max) binary.BigEndian.PutUint16(buf[8:10], d.entries) binary.BigEndian.PutUint32(buf[10:14], d.id) + copy(buf[indexBlockDescSize:], d.extBitmap) return buf[:] } -// decode unpacks index block descriptor from byte stream. +// decode unpacks index block descriptor from byte stream. It's safe to mutate +// the provided byte stream after the function call. func (d *indexBlockDesc) decode(blob []byte) { d.max = binary.BigEndian.Uint64(blob[:8]) d.entries = binary.BigEndian.Uint16(blob[8:10]) d.id = binary.BigEndian.Uint32(blob[10:14]) + d.extBitmap = bytes.Clone(blob[indexBlockDescSize:]) +} + +// copy returns a deep-copied object. +func (d *indexBlockDesc) copy() *indexBlockDesc { + return &indexBlockDesc{ + max: d.max, + entries: d.entries, + id: d.id, + extBitmap: bytes.Clone(d.extBitmap), + } } // parseIndexBlock parses the index block with the supplied byte stream. @@ -97,20 +109,38 @@ func (d *indexBlockDesc) decode(blob []byte) { // A uint16 can cover offsets in the range [0, 65536), which is more than enough // to store 4096 integers. // -// Each chunk begins with the full value of the first integer, followed by -// subsequent integers representing the differences between the current value -// and the preceding one. Integers are encoded with variable-size for best -// storage efficiency. Each chunk can be illustrated as below. +// Each chunk begins with a full integer value for the first element, followed +// by subsequent integers encoded as differences (deltas) from their preceding +// values. All integers use variable-length encoding for optimal space efficiency. // -// Restart ---> +----------------+ -// | Full integer | -// +----------------+ -// | Diff with prev | -// +----------------+ -// | ... | -// +----------------+ -// | Diff with prev | -// +----------------+ +// In the updated format, each element in the chunk may optionally include an +// "extension" section. If an extension is present, it starts with a var-size +// integer indicating the length of the remaining extension payload, followed by +// that many bytes. If no extension is present, the element format is identical +// to the original version (i.e., only the integer or delta value is encoded). +// +// In the trienode history index, the extension field contains the list of +// trie node IDs that fall within this range. For the given state transition, +// these IDs represent the specific nodes in this range that were mutated. +// +// Whether an element includes an extension is determined by the block reader +// based on the specification. Conceptually, a chunk is structured as: +// +// Restart ---> +----------------+ +// | Full integer | +// +----------------+ +// | (Extension?) | +// +----------------+ +// | Diff with prev | +// +----------------+ +// | (Extension?) | +// +----------------+ +// | ... | +// +----------------+ +// | Diff with prev | +// +----------------+ +// | (Extension?) | +// +----------------+ // // Empty index block is regarded as invalid. func parseIndexBlock(blob []byte) ([]uint16, []byte, error) { @@ -148,24 +178,26 @@ func parseIndexBlock(blob []byte) ([]uint16, []byte, error) { type blockReader struct { restarts []uint16 data []byte + hasExt bool } // newBlockReader constructs the block reader with the supplied block data. -func newBlockReader(blob []byte) (*blockReader, error) { +func newBlockReader(blob []byte, hasExt bool) (*blockReader, error) { restarts, data, err := parseIndexBlock(blob) if err != nil { return nil, err } return &blockReader{ restarts: restarts, - data: data, // safe to own the slice + data: data, // safe to own the slice + hasExt: hasExt, // flag whether extension should be resolved }, nil } // readGreaterThan locates the first element in the block that is greater than // the specified value. If no such element is found, MaxUint64 is returned. func (br *blockReader) readGreaterThan(id uint64) (uint64, error) { - it := newBlockIterator(br.data, br.restarts) + it := br.newIterator(nil) found := it.SeekGT(id) if err := it.Error(); err != nil { return 0, err @@ -180,17 +212,19 @@ type blockWriter struct { desc *indexBlockDesc // Descriptor of the block restarts []uint16 // Offsets into the data slice, marking the start of each section data []byte // Aggregated encoded data slice + hasExt bool // Flag whether the extension field for each element exists } // newBlockWriter constructs a block writer. In addition to the existing data // and block description, it takes an element ID and prunes all existing elements // above that ID. It's essential as the recovery mechanism after unclean shutdown // during the history indexing. -func newBlockWriter(blob []byte, desc *indexBlockDesc, limit uint64) (*blockWriter, error) { +func newBlockWriter(blob []byte, desc *indexBlockDesc, limit uint64, hasExt bool) (*blockWriter, error) { if len(blob) == 0 { return &blockWriter{ - desc: desc, - data: make([]byte, 0, 1024), + desc: desc, + data: make([]byte, 0, 1024), + hasExt: hasExt, }, nil } restarts, data, err := parseIndexBlock(blob) @@ -201,6 +235,7 @@ func newBlockWriter(blob []byte, desc *indexBlockDesc, limit uint64) (*blockWrit desc: desc, restarts: restarts, data: data, // safe to own the slice + hasExt: hasExt, } var trimmed int for !writer.empty() && writer.last() > limit { @@ -215,9 +250,26 @@ func newBlockWriter(blob []byte, desc *indexBlockDesc, limit uint64) (*blockWrit return writer, nil } +// setBitmap applies the given extension elements into the bitmap. +func (b *blockWriter) setBitmap(ext []uint16) { + for _, n := range ext { + // Node ID zero is intentionally filtered out. Any element in this range + // can indicate that the sub-tree's root node was mutated, so storing zero + // is redundant and saves one byte for bitmap. + if n != 0 { + setBit(b.desc.extBitmap, int(n-1)) + } + } +} + // append adds a new element to the block. The new element must be greater than // the previous one. The provided ID is assumed to always be greater than 0. -func (b *blockWriter) append(id uint64) error { +// +// ext refers to the optional extension field attached to the appended element. +// This extension mechanism is used by trie-node history and represents a list of +// trie node IDs that fall within the range covered by the index element +// (typically corresponding to a sub-trie in trie-node history). +func (b *blockWriter) append(id uint64, ext []uint16) error { if id == 0 { return errors.New("invalid zero id") } @@ -244,13 +296,29 @@ func (b *blockWriter) append(id uint64) error { // element. b.data = binary.AppendUvarint(b.data, id-b.desc.max) } + // Extension validation + if (len(ext) == 0) != !b.hasExt { + if len(ext) == 0 { + return errors.New("missing extension") + } + return errors.New("unexpected extension") + } + // Append the extension if it is not nil. The extension is prefixed with a + // length indicator, and the block reader MUST understand this scheme and + // decode the extension accordingly. + if len(ext) > 0 { + b.setBitmap(ext) + enc := encodeIDs(ext) + b.data = binary.AppendUvarint(b.data, uint64(len(enc))) + b.data = append(b.data, enc...) + } b.desc.entries++ b.desc.max = id return nil } // scanSection traverses the specified section and terminates if fn returns true. -func (b *blockWriter) scanSection(section int, fn func(uint64, int) bool) { +func (b *blockWriter) scanSection(section int, fn func(uint64, int, []uint16) bool) error { var ( value uint64 start = int(b.restarts[section]) @@ -269,28 +337,47 @@ func (b *blockWriter) scanSection(section int, fn func(uint64, int) bool) { } else { value += x } - if fn(value, pos) { - return + // Resolve the extension if exists + var ( + err error + ext []uint16 + extLen int + ) + if b.hasExt { + l, ln := binary.Uvarint(b.data[pos+n:]) + extLen = ln + int(l) + ext, err = decodeIDs(b.data[pos+n+ln : pos+n+extLen]) } + if err != nil { + return err + } + if fn(value, pos, ext) { + return nil + } + // Shift to next position pos += n + pos += extLen } + return nil } // sectionLast returns the last element in the specified section. -func (b *blockWriter) sectionLast(section int) uint64 { +func (b *blockWriter) sectionLast(section int) (uint64, error) { var n uint64 - b.scanSection(section, func(v uint64, _ int) bool { + if err := b.scanSection(section, func(v uint64, _ int, _ []uint16) bool { n = v return false - }) - return n + }); err != nil { + return 0, err + } + return n, nil } // sectionSearch looks up the specified value in the given section, // the position and the preceding value will be returned if found. // It assumes that the preceding element exists in the section. -func (b *blockWriter) sectionSearch(section int, n uint64) (found bool, prev uint64, pos int) { - b.scanSection(section, func(v uint64, p int) bool { +func (b *blockWriter) sectionSearch(section int, n uint64) (found bool, prev uint64, pos int, err error) { + if err := b.scanSection(section, func(v uint64, p int, _ []uint16) bool { if n == v { pos = p found = true @@ -298,8 +385,24 @@ func (b *blockWriter) sectionSearch(section int, n uint64) (found bool, prev uin } prev = v return false // continue iteration - }) - return found, prev, pos + }); err != nil { + return false, 0, 0, err + } + return found, prev, pos, nil +} + +// rebuildBitmap scans the entire block and rebuilds the bitmap. +func (b *blockWriter) rebuildBitmap() error { + clear(b.desc.extBitmap) + for i := 0; i < len(b.restarts); i++ { + if err := b.scanSection(i, func(v uint64, p int, ext []uint16) bool { + b.setBitmap(ext) + return false // continue iteration + }); err != nil { + return err + } + } + return nil } // pop removes the last element from the block. The assumption is held that block @@ -315,6 +418,7 @@ func (b *blockWriter) pop(id uint64) error { if b.desc.entries == 1 { b.desc.max = 0 b.desc.entries = 0 + clear(b.desc.extBitmap) b.restarts = nil b.data = b.data[:0] return nil @@ -324,28 +428,36 @@ func (b *blockWriter) pop(id uint64) error { if b.desc.entries%indexBlockRestartLen == 1 { b.data = b.data[:b.restarts[len(b.restarts)-1]] b.restarts = b.restarts[:len(b.restarts)-1] - b.desc.max = b.sectionLast(len(b.restarts) - 1) + last, err := b.sectionLast(len(b.restarts) - 1) + if err != nil { + return err + } + b.desc.max = last b.desc.entries -= 1 - return nil + return b.rebuildBitmap() } // Look up the element preceding the one to be popped, in order to update // the maximum element in the block. - found, prev, pos := b.sectionSearch(len(b.restarts)-1, id) + found, prev, pos, err := b.sectionSearch(len(b.restarts)-1, id) + if err != nil { + return err + } if !found { return fmt.Errorf("pop element is not found, last: %d, this: %d", b.desc.max, id) } b.desc.max = prev b.data = b.data[:pos] b.desc.entries -= 1 - return nil + return b.rebuildBitmap() } func (b *blockWriter) empty() bool { return b.desc.empty() } -func (b *blockWriter) full() bool { - return b.desc.full() +func (b *blockWriter) estimateFull(ext []uint16) bool { + size := 8 + 2*len(ext) + return len(b.data)+size > indexBlockMaxSize } // last returns the last element in the block. It should only be called when diff --git a/triedb/pathdb/history_index_block_test.go b/triedb/pathdb/history_index_block_test.go index f8c6d3ab87..923ae29348 100644 --- a/triedb/pathdb/history_index_block_test.go +++ b/triedb/pathdb/history_index_block_test.go @@ -17,6 +17,7 @@ package pathdb import ( + "bytes" "math" "math/rand" "slices" @@ -24,16 +25,36 @@ import ( "testing" ) +func randomExt(bitmapSize int, n int) []uint16 { + if bitmapSize == 0 { + return nil + } + var ( + limit = bitmapSize * 8 + extList []uint16 + ) + for i := 0; i < n; i++ { + extList = append(extList, uint16(rand.Intn(limit+1))) + } + return extList +} + func TestBlockReaderBasic(t *testing.T) { + testBlockReaderBasic(t, 0) + testBlockReaderBasic(t, 2) + testBlockReaderBasic(t, 34) +} + +func testBlockReaderBasic(t *testing.T, bitmapSize int) { elements := []uint64{ 1, 5, 10, 11, 20, } - bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0) + bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, bitmapSize), 0, bitmapSize != 0) for i := 0; i < len(elements); i++ { - bw.append(elements[i]) + bw.append(elements[i], randomExt(bitmapSize, 5)) } - br, err := newBlockReader(bw.finish()) + br, err := newBlockReader(bw.finish(), bitmapSize != 0) if err != nil { t.Fatalf("Failed to construct the block reader, %v", err) } @@ -60,18 +81,24 @@ func TestBlockReaderBasic(t *testing.T) { } func TestBlockReaderLarge(t *testing.T) { + testBlockReaderLarge(t, 0) + testBlockReaderLarge(t, 2) + testBlockReaderLarge(t, 34) +} + +func testBlockReaderLarge(t *testing.T, bitmapSize int) { var elements []uint64 for i := 0; i < 1000; i++ { elements = append(elements, rand.Uint64()) } slices.Sort(elements) - bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0) + bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, bitmapSize), 0, bitmapSize != 0) for i := 0; i < len(elements); i++ { - bw.append(elements[i]) + bw.append(elements[i], randomExt(bitmapSize, 5)) } - br, err := newBlockReader(bw.finish()) + br, err := newBlockReader(bw.finish(), bitmapSize != 0) if err != nil { t.Fatalf("Failed to construct the block reader, %v", err) } @@ -95,26 +122,32 @@ func TestBlockReaderLarge(t *testing.T) { } func TestBlockWriterBasic(t *testing.T) { - bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0) + testBlockWriteBasic(t, 0) + testBlockWriteBasic(t, 2) + testBlockWriteBasic(t, 34) +} + +func testBlockWriteBasic(t *testing.T, bitmapSize int) { + bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, bitmapSize), 0, bitmapSize != 0) if !bw.empty() { t.Fatal("expected empty block") } - bw.append(2) - if err := bw.append(1); err == nil { + bw.append(2, randomExt(bitmapSize, 5)) + if err := bw.append(1, randomExt(bitmapSize, 5)); err == nil { t.Fatal("out-of-order insertion is not expected") } var maxElem uint64 for i := 0; i < 10; i++ { - bw.append(uint64(i + 3)) + bw.append(uint64(i+3), randomExt(bitmapSize, 5)) maxElem = uint64(i + 3) } - bw, err := newBlockWriter(bw.finish(), newIndexBlockDesc(0), maxElem) + bw, err := newBlockWriter(bw.finish(), newIndexBlockDesc(0, bitmapSize), maxElem, bitmapSize != 0) if err != nil { t.Fatalf("Failed to construct the block writer, %v", err) } for i := 0; i < 10; i++ { - if err := bw.append(uint64(i + 100)); err != nil { + if err := bw.append(uint64(i+100), randomExt(bitmapSize, 5)); err != nil { t.Fatalf("Failed to append value %d: %v", i, err) } } @@ -122,58 +155,38 @@ func TestBlockWriterBasic(t *testing.T) { } func TestBlockWriterWithLimit(t *testing.T) { - bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0) + testBlockWriterWithLimit(t, 0) + testBlockWriterWithLimit(t, 2) + testBlockWriterWithLimit(t, 34) +} - var maxElem uint64 - for i := 0; i < indexBlockRestartLen*2; i++ { - bw.append(uint64(i + 1)) - maxElem = uint64(i + 1) - } +func testBlockWriterWithLimit(t *testing.T, bitmapSize int) { + bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, bitmapSize), 0, bitmapSize != 0) - suites := []struct { - limit uint64 - expMax uint64 - }{ - // nothing to truncate - { - maxElem, maxElem, - }, - // truncate the last element - { - maxElem - 1, maxElem - 1, - }, - // truncation around the restart boundary - { - uint64(indexBlockRestartLen + 1), - uint64(indexBlockRestartLen + 1), - }, - // truncation around the restart boundary - { - uint64(indexBlockRestartLen), - uint64(indexBlockRestartLen), - }, - { - uint64(1), uint64(1), - }, - // truncate the entire block, it's in theory invalid - { - uint64(0), uint64(0), - }, + var bitmaps [][]byte + for i := 0; i < indexBlockRestartLen+2; i++ { + bw.append(uint64(i+1), randomExt(bitmapSize, 5)) + bitmaps = append(bitmaps, bytes.Clone(bw.desc.extBitmap)) } - for i, suite := range suites { - desc := *bw.desc - block, err := newBlockWriter(bw.finish(), &desc, suite.limit) + for i := 0; i < indexBlockRestartLen+2; i++ { + limit := uint64(i + 1) + + desc := bw.desc.copy() + block, err := newBlockWriter(bytes.Clone(bw.finish()), desc, limit, bitmapSize != 0) if err != nil { t.Fatalf("Failed to construct the block writer, %v", err) } - if block.desc.max != suite.expMax { - t.Fatalf("Test %d, unexpected max value, got %d, want %d", i, block.desc.max, suite.expMax) + if block.desc.max != limit { + t.Fatalf("Test %d, unexpected max value, got %d, want %d", i, block.desc.max, limit) + } + if !bytes.Equal(desc.extBitmap, bitmaps[i]) { + t.Fatalf("Test %d, unexpected bitmap, got: %v, want: %v", i, block.desc.extBitmap, bitmaps[i]) } // Re-fill the elements var maxElem uint64 - for elem := suite.limit + 1; elem < indexBlockRestartLen*4; elem++ { - if err := block.append(elem); err != nil { + for elem := limit + 1; elem < indexBlockRestartLen+4; elem++ { + if err := block.append(elem, randomExt(bitmapSize, 5)); err != nil { t.Fatalf("Failed to append value %d: %v", elem, err) } maxElem = elem @@ -185,9 +198,15 @@ func TestBlockWriterWithLimit(t *testing.T) { } func TestBlockWriterDelete(t *testing.T) { - bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0) + testBlockWriterDelete(t, 0) + testBlockWriterDelete(t, 2) + testBlockWriterDelete(t, 34) +} + +func testBlockWriterDelete(t *testing.T, bitmapSize int) { + bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, bitmapSize), 0, bitmapSize != 0) for i := 0; i < 10; i++ { - bw.append(uint64(i + 1)) + bw.append(uint64(i+1), randomExt(bitmapSize, 5)) } // Pop unknown id, the request should be rejected if err := bw.pop(100); err == nil { @@ -209,12 +228,18 @@ func TestBlockWriterDelete(t *testing.T) { } func TestBlcokWriterDeleteWithData(t *testing.T) { + testBlcokWriterDeleteWithData(t, 0) + testBlcokWriterDeleteWithData(t, 2) + testBlcokWriterDeleteWithData(t, 34) +} + +func testBlcokWriterDeleteWithData(t *testing.T, bitmapSize int) { elements := []uint64{ 1, 5, 10, 11, 20, } - bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0) + bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, bitmapSize), 0, bitmapSize != 0) for i := 0; i < len(elements); i++ { - bw.append(elements[i]) + bw.append(elements[i], randomExt(bitmapSize, 5)) } // Re-construct the block writer with data @@ -223,7 +248,10 @@ func TestBlcokWriterDeleteWithData(t *testing.T) { max: 20, entries: 5, } - bw, err := newBlockWriter(bw.finish(), desc, elements[len(elements)-1]) + if bitmapSize > 0 { + desc.extBitmap = make([]byte, bitmapSize) + } + bw, err := newBlockWriter(bw.finish(), desc, elements[len(elements)-1], bitmapSize != 0) if err != nil { t.Fatalf("Failed to construct block writer %v", err) } @@ -234,7 +262,7 @@ func TestBlcokWriterDeleteWithData(t *testing.T) { newTail := elements[i-1] // Ensure the element can still be queried with no issue - br, err := newBlockReader(bw.finish()) + br, err := newBlockReader(bw.finish(), bitmapSize != 0) if err != nil { t.Fatalf("Failed to construct the block reader, %v", err) } @@ -266,29 +294,60 @@ func TestBlcokWriterDeleteWithData(t *testing.T) { } func TestCorruptedIndexBlock(t *testing.T) { - bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0) + bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, 0), 0, false) var maxElem uint64 for i := 0; i < 10; i++ { - bw.append(uint64(i + 1)) + bw.append(uint64(i+1), nil) maxElem = uint64(i + 1) } buf := bw.finish() // Mutate the buffer manually buf[len(buf)-1]++ - _, err := newBlockWriter(buf, newIndexBlockDesc(0), maxElem) + _, err := newBlockWriter(buf, newIndexBlockDesc(0, 0), maxElem, false) if err == nil { t.Fatal("Corrupted index block data is not detected") } } // BenchmarkParseIndexBlock benchmarks the performance of parseIndexBlock. +// +// goos: darwin +// goarch: arm64 +// pkg: github.com/ethereum/go-ethereum/triedb/pathdb +// cpu: Apple M1 Pro +// BenchmarkParseIndexBlock +// BenchmarkParseIndexBlock-8 35829495 34.16 ns/op func BenchmarkParseIndexBlock(b *testing.B) { // Generate a realistic index block blob - bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0) + bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, 0), 0, false) for i := 0; i < 4096; i++ { - bw.append(uint64(i * 2)) + bw.append(uint64(i*2), nil) + } + blob := bw.finish() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _, err := parseIndexBlock(blob) + if err != nil { + b.Fatalf("parseIndexBlock failed: %v", err) + } + } +} + +// goos: darwin +// goarch: arm64 +// pkg: github.com/ethereum/go-ethereum/triedb/pathdb +// cpu: Apple M1 Pro +// BenchmarkParseIndexBlockWithExt +// BenchmarkParseIndexBlockWithExt-8 35773242 33.72 ns/op +func BenchmarkParseIndexBlockWithExt(b *testing.B) { + // Generate a realistic index block blob + bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, 34), 0, true) + for i := 0; i < 4096; i++ { + id, ext := uint64(i*2), randomExt(34, 3) + bw.append(id, ext) } blob := bw.finish() @@ -302,21 +361,58 @@ func BenchmarkParseIndexBlock(b *testing.B) { } // BenchmarkBlockWriterAppend benchmarks the performance of indexblock.writer +// +// goos: darwin +// goarch: arm64 +// pkg: github.com/ethereum/go-ethereum/triedb/pathdb +// cpu: Apple M1 Pro +// BenchmarkBlockWriterAppend +// BenchmarkBlockWriterAppend-8 293611083 4.113 ns/op 3 B/op 0 allocs/op func BenchmarkBlockWriterAppend(b *testing.B) { b.ReportAllocs() b.ResetTimer() var blockID uint32 - desc := newIndexBlockDesc(blockID) - writer, _ := newBlockWriter(nil, desc, 0) + desc := newIndexBlockDesc(blockID, 0) + writer, _ := newBlockWriter(nil, desc, 0, false) for i := 0; i < b.N; i++ { - if writer.full() { + if writer.estimateFull(nil) { blockID += 1 - desc = newIndexBlockDesc(blockID) - writer, _ = newBlockWriter(nil, desc, 0) + desc = newIndexBlockDesc(blockID, 0) + writer, _ = newBlockWriter(nil, desc, 0, false) } - if err := writer.append(writer.desc.max + 1); err != nil { + if err := writer.append(writer.desc.max+1, nil); err != nil { + b.Error(err) + } + } +} + +// goos: darwin +// goarch: arm64 +// pkg: github.com/ethereum/go-ethereum/triedb/pathdb +// cpu: Apple M1 Pro +// BenchmarkBlockWriterAppendWithExt +// BenchmarkBlockWriterAppendWithExt-8 11123844 103.6 ns/op 42 B/op 2 allocs/op +func BenchmarkBlockWriterAppendWithExt(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + + var ( + bitmapSize = 34 + blockID uint32 + ) + desc := newIndexBlockDesc(blockID, bitmapSize) + writer, _ := newBlockWriter(nil, desc, 0, true) + + for i := 0; i < b.N; i++ { + ext := randomExt(bitmapSize, 3) + if writer.estimateFull(ext) { + blockID += 1 + desc = newIndexBlockDesc(blockID, bitmapSize) + writer, _ = newBlockWriter(nil, desc, 0, true) + } + if err := writer.append(writer.desc.max+1, ext); err != nil { b.Error(err) } } diff --git a/triedb/pathdb/history_index_iterator.go b/triedb/pathdb/history_index_iterator.go index 1ccb39ad09..076baaa9e5 100644 --- a/triedb/pathdb/history_index_iterator.go +++ b/triedb/pathdb/history_index_iterator.go @@ -40,31 +40,133 @@ type HistoryIndexIterator interface { Error() error } +// extFilter provides utilities for filtering index entries based on their +// extension field. +// +// It supports two primary operations: +// +// - determine whether a given target node ID or any of its descendants +// appears explicitly in the extension list. +// +// - determine whether a given target node ID or any of its descendants +// is marked in the extension bitmap. +// +// Together, these checks allow callers to efficiently filter out the irrelevant +// index entries during the lookup. +type extFilter uint16 + +// exists takes the entire extension field in the index block and determines +// whether the target ID or its descendants appears. Note, any of descendant +// can implicitly mean the presence of ancestor. +func (f extFilter) exists(ext []byte) (bool, error) { + fn := uint16(f) + list, err := decodeIDs(ext) + if err != nil { + return false, err + } + for _, elem := range list { + if elem == fn { + return true, nil + } + if isAncestor(fn, elem) { + return true, nil + } + } + return false, nil +} + +const ( + // bitmapBytesTwoLevels is the size of the bitmap for two levels of the + // 16-ary tree (16 nodes total, excluding the root). + bitmapBytesTwoLevels = 2 + + // bitmapBytesThreeLevels is the size of the bitmap for three levels of + // the 16-ary tree (272 nodes total, excluding the root). + bitmapBytesThreeLevels = 34 + + // bitmapElementThresholdTwoLevels is the total number of elements in the + // two levels of a 16-ary tree (16 nodes total, excluding the root). + bitmapElementThresholdTwoLevels = 16 + + // bitmapElementThresholdThreeLevels is the total number of elements in the + // two levels of a 16-ary tree (16 nodes total, excluding the root). + bitmapElementThresholdThreeLevels = bitmapElementThresholdTwoLevels + 16*16 +) + +// contains takes the bitmap from the block metadata and determines whether the +// target ID or its descendants is marked in the bitmap. Note, any of descendant +// can implicitly mean the presence of ancestor. +func (f extFilter) contains(bitmap []byte) (bool, error) { + id := int(f) + if id == 0 { + return true, nil + } + n := id - 1 // apply the position shift for excluding root node + + switch len(bitmap) { + case 0: + // Bitmap is not available, return "false positive" + return true, nil + case bitmapBytesTwoLevels: + // Bitmap for 2-level trie with at most 16 elements inside + if n >= bitmapElementThresholdTwoLevels { + return false, fmt.Errorf("invalid extension filter %d for 2 bytes bitmap", id) + } + return isBitSet(bitmap, n), nil + case bitmapBytesThreeLevels: + // Bitmap for 3-level trie with at most 16+16*16 elements inside + if n >= bitmapElementThresholdThreeLevels { + return false, fmt.Errorf("invalid extension filter %d for 34 bytes bitmap", id) + } else if n >= bitmapElementThresholdTwoLevels { + return isBitSet(bitmap, n), nil + } else { + // Check the element itself first + if isBitSet(bitmap, n) { + return true, nil + } + // Check descendants: the presence of any descendant implicitly + // represents a mutation of its ancestor. + return bitmap[2+2*n] != 0 || bitmap[3+2*n] != 0, nil + } + default: + return false, fmt.Errorf("unsupported bitmap size %d", len(bitmap)) + } +} + // blockIterator is the iterator to traverse the indices within a single block. type blockIterator struct { // immutable fields data []byte // Reference to the data segment within the block reader restarts []uint16 // Offsets pointing to the restart sections within the data + hasExt bool // Flag whether the extension is included in the data + + // Optional extension filter + filter *extFilter // Filters index entries based on the extension field. // mutable fields id uint64 // ID of the element at the iterators current position + ext []byte // Extension field of the element at the iterators current position dataPtr int // Current read position within the data slice restartPtr int // Index of the restart section where the iterator is currently positioned exhausted bool // Flag whether the iterator has been exhausted err error // Accumulated error during the traversal } -func newBlockIterator(data []byte, restarts []uint16) *blockIterator { +func (br *blockReader) newIterator(filter *extFilter) *blockIterator { it := &blockIterator{ - data: data, // hold the slice directly with no deep copy - restarts: restarts, // hold the slice directly with no deep copy + data: br.data, // hold the slice directly with no deep copy + restarts: br.restarts, // hold the slice directly with no deep copy + hasExt: br.hasExt, // flag whether the extension should be resolved + filter: filter, // optional extension filter } it.reset() return it } -func (it *blockIterator) set(dataPtr int, restartPtr int, id uint64) { +func (it *blockIterator) set(dataPtr int, restartPtr int, id uint64, ext []byte) { it.id = id + it.ext = ext + it.dataPtr = dataPtr it.restartPtr = restartPtr it.exhausted = dataPtr == len(it.data) @@ -79,6 +181,8 @@ func (it *blockIterator) setErr(err error) { func (it *blockIterator) reset() { it.id = 0 + it.ext = nil + it.dataPtr = -1 it.restartPtr = -1 it.exhausted = false @@ -90,12 +194,26 @@ func (it *blockIterator) reset() { } } -// SeekGT moves the iterator to the first element whose id is greater than the +func (it *blockIterator) resolveExt(pos int) ([]byte, int, error) { + if !it.hasExt { + return nil, 0, nil + } + length, n := binary.Uvarint(it.data[pos:]) + if n <= 0 { + return nil, 0, fmt.Errorf("too short for extension, pos: %d, datalen: %d", pos, len(it.data)) + } + if len(it.data[pos+n:]) < int(length) { + return nil, 0, fmt.Errorf("too short for extension, pos: %d, length: %d, datalen: %d", pos, length, len(it.data)) + } + return it.data[pos+n : pos+n+int(length)], n + int(length), nil +} + +// seekGT moves the iterator to the first element whose id is greater than the // given number. It returns whether such element exists. // // Note, this operation will unset the exhausted status and subsequent traversal // is allowed. -func (it *blockIterator) SeekGT(id uint64) bool { +func (it *blockIterator) seekGT(id uint64) bool { if it.err != nil { return false } @@ -112,11 +230,20 @@ func (it *blockIterator) SeekGT(id uint64) bool { return false } if index == 0 { - item, n := binary.Uvarint(it.data[it.restarts[0]:]) + pos := int(it.restarts[0]) + item, n := binary.Uvarint(it.data[pos:]) + if n <= 0 { + it.setErr(fmt.Errorf("failed to decode item at pos %d", it.restarts[0])) + return false + } + pos = pos + n - // If the restart size is 1, then the restart pointer shouldn't be 0. - // It's not practical and should be denied in the first place. - it.set(int(it.restarts[0])+n, 0, item) + ext, shift, err := it.resolveExt(pos) + if err != nil { + it.setErr(err) + return false + } + it.set(pos+shift, 0, item, ext) return true } var ( @@ -154,11 +281,18 @@ func (it *blockIterator) SeekGT(id uint64) bool { } pos += n + ext, shift, err := it.resolveExt(pos) + if err != nil { + it.setErr(err) + return false + } + pos += shift + if result > id { if pos == limit { - it.set(pos, restartIndex+1, result) + it.set(pos, restartIndex+1, result, ext) } else { - it.set(pos, restartIndex, result) + it.set(pos, restartIndex, result, ext) } return true } @@ -170,8 +304,45 @@ func (it *blockIterator) SeekGT(id uint64) bool { } // The element which is the first one greater than the specified id // is exactly the one located at the restart point. - item, n := binary.Uvarint(it.data[it.restarts[index]:]) - it.set(int(it.restarts[index])+n, index, item) + pos = int(it.restarts[index]) + item, n := binary.Uvarint(it.data[pos:]) + if n <= 0 { + it.setErr(fmt.Errorf("failed to decode item at pos %d", it.restarts[index])) + return false + } + pos = pos + n + + ext, shift, err := it.resolveExt(pos) + if err != nil { + it.setErr(err) + return false + } + it.set(pos+shift, index, item, ext) + return true +} + +// SeekGT implements HistoryIndexIterator, is the wrapper of the seekGT with +// optional extension filter logic applied. +func (it *blockIterator) SeekGT(id uint64) bool { + if !it.seekGT(id) { + return false + } + if it.filter == nil { + return true + } + for { + found, err := it.filter.exists(it.ext) + if err != nil { + it.setErr(err) + return false + } + if found { + break + } + if !it.next() { + return false + } + } return true } @@ -183,10 +354,9 @@ func (it *blockIterator) init() { it.restartPtr = 0 } -// Next implements the HistoryIndexIterator, moving the iterator to the next -// element. If the iterator has been exhausted, and boolean with false should -// be returned. -func (it *blockIterator) Next() bool { +// next moves the iterator to the next element. If the iterator has been exhausted, +// and boolean with false should be returned. +func (it *blockIterator) next() bool { if it.exhausted || it.err != nil { return false } @@ -198,7 +368,6 @@ func (it *blockIterator) Next() bool { it.setErr(fmt.Errorf("failed to decode item at pos %d", it.dataPtr)) return false } - var val uint64 if it.dataPtr == int(it.restarts[it.restartPtr]) { val = v @@ -206,16 +375,48 @@ func (it *blockIterator) Next() bool { val = it.id + v } + // Decode the extension field + ext, shift, err := it.resolveExt(it.dataPtr + n) + if err != nil { + it.setErr(err) + return false + } + // Move to the next restart section if the data pointer crosses the boundary nextRestartPtr := it.restartPtr - if it.restartPtr < len(it.restarts)-1 && it.dataPtr+n == int(it.restarts[it.restartPtr+1]) { + if it.restartPtr < len(it.restarts)-1 && it.dataPtr+n+shift == int(it.restarts[it.restartPtr+1]) { nextRestartPtr = it.restartPtr + 1 } - it.set(it.dataPtr+n, nextRestartPtr, val) + it.set(it.dataPtr+n+shift, nextRestartPtr, val, ext) return true } +// Next implements the HistoryIndexIterator, moving the iterator to the next +// element. It's a wrapper of next with optional extension filter logic applied. +func (it *blockIterator) Next() bool { + if !it.next() { + return false + } + if it.filter == nil { + return true + } + for { + found, err := it.filter.exists(it.ext) + if err != nil { + it.setErr(err) + return false + } + if found { + break + } + if !it.next() { + return false + } + } + return true +} + // ID implements HistoryIndexIterator, returning the id of the element where the // iterator is positioned at. func (it *blockIterator) ID() uint64 { @@ -226,15 +427,15 @@ func (it *blockIterator) ID() uint64 { // Exhausting all the elements is not considered to be an error. func (it *blockIterator) Error() error { return it.err } -// blockLoader defines the method to retrieve the specific block for reading. -type blockLoader func(id uint32) (*blockReader, error) - // indexIterator is an iterator to traverse the history indices belonging to the // specific state entry. type indexIterator struct { // immutable fields descList []*indexBlockDesc - loader blockLoader + reader *indexReader + + // Optional extension filter + filter *extFilter // mutable fields blockIt *blockIterator @@ -243,10 +444,26 @@ type indexIterator struct { err error } -func newIndexIterator(descList []*indexBlockDesc, loader blockLoader) *indexIterator { +// newBlockIter initializes the block iterator with the specified block ID. +func (r *indexReader) newBlockIter(id uint32, filter *extFilter) (*blockIterator, error) { + br, ok := r.readers[id] + if !ok { + var err error + br, err = newBlockReader(readStateIndexBlock(r.state, r.db, id), r.bitmapSize != 0) + if err != nil { + return nil, err + } + r.readers[id] = br + } + return br.newIterator(filter), nil +} + +// newIterator initializes the index iterator with the specified extension filter. +func (r *indexReader) newIterator(filter *extFilter) *indexIterator { it := &indexIterator{ - descList: descList, - loader: loader, + descList: r.descList, + reader: r, + filter: filter, } it.reset() return it @@ -271,16 +488,32 @@ func (it *indexIterator) reset() { } func (it *indexIterator) open(blockPtr int) error { - id := it.descList[blockPtr].id - br, err := it.loader(id) + blockIt, err := it.reader.newBlockIter(it.descList[blockPtr].id, it.filter) if err != nil { return err } - it.blockIt = newBlockIterator(br.data, br.restarts) + it.blockIt = blockIt it.blockPtr = blockPtr return nil } +func (it *indexIterator) applyFilter(index int) (int, error) { + if it.filter == nil { + return index, nil + } + for index < len(it.descList) { + found, err := it.filter.contains(it.descList[index].extBitmap) + if err != nil { + return 0, err + } + if found { + break + } + index++ + } + return index, nil +} + // SeekGT moves the iterator to the first element whose id is greater than the // given number. It returns whether such element exists. // @@ -293,6 +526,11 @@ func (it *indexIterator) SeekGT(id uint64) bool { index := sort.Search(len(it.descList), func(i int) bool { return id < it.descList[i].max }) + index, err := it.applyFilter(index) + if err != nil { + it.setErr(err) + return false + } if index == len(it.descList) { return false } @@ -304,7 +542,13 @@ func (it *indexIterator) SeekGT(id uint64) bool { return false } } - return it.blockIt.SeekGT(id) + // Terminate if the element which is greater than the id can be found in the + // last block; otherwise move to the next block. It may happen that all the + // target elements in this block are all less than id. + if it.blockIt.SeekGT(id) { + return true + } + return it.Next() } func (it *indexIterator) init() error { @@ -325,15 +569,23 @@ func (it *indexIterator) Next() bool { it.setErr(err) return false } - if it.blockIt.Next() { return true } - if it.blockPtr == len(it.descList)-1 { + it.blockPtr++ + + index, err := it.applyFilter(it.blockPtr) + if err != nil { + it.setErr(err) + return false + } + it.blockPtr = index + + if it.blockPtr == len(it.descList) { it.exhausted = true return false } - if err := it.open(it.blockPtr + 1); err != nil { + if err := it.open(it.blockPtr); err != nil { it.setErr(err) return false } diff --git a/triedb/pathdb/history_index_iterator_test.go b/triedb/pathdb/history_index_iterator_test.go index f0dd3fee4a..8b7591ce26 100644 --- a/triedb/pathdb/history_index_iterator_test.go +++ b/triedb/pathdb/history_index_iterator_test.go @@ -19,7 +19,9 @@ package pathdb import ( "errors" "fmt" + "maps" "math/rand" + "slices" "sort" "testing" @@ -28,12 +30,30 @@ import ( "github.com/ethereum/go-ethereum/ethdb" ) -func makeTestIndexBlock(count int) ([]byte, []uint64) { +func checkExt(f *extFilter, ext []uint16) bool { + if f == nil { + return true + } + fn := uint16(*f) + + for _, n := range ext { + if n == fn { + return true + } + if isAncestor(fn, n) { + return true + } + } + return false +} + +func makeTestIndexBlock(count int, bitmapSize int) ([]byte, []uint64, [][]uint16) { var ( marks = make(map[uint64]bool) - elements []uint64 + elements = make([]uint64, 0, count) + extList = make([][]uint16, 0, count) ) - bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0) + bw, _ := newBlockWriter(nil, newIndexBlockDesc(0, bitmapSize), 0, bitmapSize != 0) for i := 0; i < count; i++ { n := uint64(rand.Uint32()) if marks[n] { @@ -45,17 +65,20 @@ func makeTestIndexBlock(count int) ([]byte, []uint64) { sort.Slice(elements, func(i, j int) bool { return elements[i] < elements[j] }) for i := 0; i < len(elements); i++ { - bw.append(elements[i]) + ext := randomExt(bitmapSize, 5) + extList = append(extList, ext) + bw.append(elements[i], ext) } data := bw.finish() - return data, elements + return data, elements, extList } -func makeTestIndexBlocks(db ethdb.KeyValueStore, stateIdent stateIdent, count int) []uint64 { +func makeTestIndexBlocks(db ethdb.KeyValueStore, stateIdent stateIdent, count int, bitmapSize int) ([]uint64, [][]uint16) { var ( marks = make(map[uint64]bool) elements []uint64 + extList [][]uint16 ) for i := 0; i < count; i++ { n := uint64(rand.Uint32()) @@ -67,15 +90,17 @@ func makeTestIndexBlocks(db ethdb.KeyValueStore, stateIdent stateIdent, count in } sort.Slice(elements, func(i, j int) bool { return elements[i] < elements[j] }) - iw, _ := newIndexWriter(db, stateIdent, 0) + iw, _ := newIndexWriter(db, stateIdent, 0, bitmapSize) for i := 0; i < len(elements); i++ { - iw.append(elements[i]) + ext := randomExt(bitmapSize, 5) + extList = append(extList, ext) + iw.append(elements[i], ext) } batch := db.NewBatch() iw.finish(batch) batch.Write() - return elements + return elements, extList } func checkSeekGT(it HistoryIndexIterator, input uint64, exp bool, expVal uint64) error { @@ -113,43 +138,40 @@ func checkNext(it HistoryIndexIterator, values []uint64) error { return it.Error() } -func TestBlockIteratorSeekGT(t *testing.T) { - /* 0-size index block is not allowed - - data, elements := makeTestIndexBlock(0) - testBlockIterator(t, data, elements) - */ - - data, elements := makeTestIndexBlock(1) - testBlockIterator(t, data, elements) - - data, elements = makeTestIndexBlock(indexBlockRestartLen) - testBlockIterator(t, data, elements) - - data, elements = makeTestIndexBlock(3 * indexBlockRestartLen) - testBlockIterator(t, data, elements) - - data, elements = makeTestIndexBlock(indexBlockEntriesCap) - testBlockIterator(t, data, elements) -} - -func testBlockIterator(t *testing.T, data []byte, elements []uint64) { - br, err := newBlockReader(data) - if err != nil { - t.Fatalf("Failed to open the block for reading, %v", err) +func verifySeekGT(t *testing.T, elements []uint64, ext [][]uint16, newIter func(filter *extFilter) HistoryIndexIterator) { + set := make(map[extFilter]bool) + for _, extList := range ext { + for _, f := range extList { + set[extFilter(f)] = true + } } - it := newBlockIterator(br.data, br.restarts) + filters := slices.Collect(maps.Keys(set)) for i := 0; i < 128; i++ { + var filter *extFilter + if rand.Intn(2) == 0 && len(filters) > 0 { + filter = &filters[rand.Intn(len(filters))] + } else { + filter = nil + } + var input uint64 if rand.Intn(2) == 0 { input = elements[rand.Intn(len(elements))] } else { input = uint64(rand.Uint32()) } + index := sort.Search(len(elements), func(i int) bool { return elements[i] > input }) + for index < len(elements) { + if checkExt(filter, ext[index]) { + break + } + index++ + } + var ( exp bool expVal uint64 @@ -160,10 +182,17 @@ func testBlockIterator(t *testing.T, data []byte, elements []uint64) { } else { exp = true expVal = elements[index] - if index < len(elements) { - remains = elements[index+1:] + + index++ + for index < len(elements) { + if checkExt(filter, ext[index]) { + remains = append(remains, elements[index]) + } + index++ } } + + it := newIter(filter) if err := checkSeekGT(it, input, exp, expVal); err != nil { t.Fatal(err) } @@ -175,62 +204,71 @@ func testBlockIterator(t *testing.T, data []byte, elements []uint64) { } } +func verifyTraversal(t *testing.T, elements []uint64, ext [][]uint16, newIter func(filter *extFilter) HistoryIndexIterator) { + set := make(map[extFilter]bool) + for _, extList := range ext { + for _, f := range extList { + set[extFilter(f)] = true + } + } + filters := slices.Collect(maps.Keys(set)) + + for i := 0; i < 16; i++ { + var filter *extFilter + if len(filters) > 0 { + filter = &filters[rand.Intn(len(filters))] + } else { + filter = nil + } + it := newIter(filter) + + var ( + pos int + exp []uint64 + ) + for pos < len(elements) { + if checkExt(filter, ext[pos]) { + exp = append(exp, elements[pos]) + } + pos++ + } + if err := checkNext(it, exp); err != nil { + t.Fatal(err) + } + } +} + +func TestBlockIteratorSeekGT(t *testing.T) { + for _, size := range []int{0, 2, 34} { + for _, n := range []int{1, indexBlockRestartLen, 3 * indexBlockRestartLen} { + data, elements, ext := makeTestIndexBlock(n, size) + + verifySeekGT(t, elements, ext, func(filter *extFilter) HistoryIndexIterator { + br, err := newBlockReader(data, size != 0) + if err != nil { + t.Fatalf("Failed to open the block for reading, %v", err) + } + return br.newIterator(filter) + }) + } + } +} + func TestIndexIteratorSeekGT(t *testing.T) { ident := newAccountIdent(common.Hash{0x1}) - dbA := rawdb.NewMemoryDatabase() - testIndexIterator(t, ident, dbA, makeTestIndexBlocks(dbA, ident, 1)) + for _, size := range []int{0, 2, 34} { + for _, n := range []int{1, 4096, 3 * 4096} { + db := rawdb.NewMemoryDatabase() + elements, ext := makeTestIndexBlocks(db, ident, n, size) - dbB := rawdb.NewMemoryDatabase() - testIndexIterator(t, ident, dbB, makeTestIndexBlocks(dbB, ident, 3*indexBlockEntriesCap)) - - dbC := rawdb.NewMemoryDatabase() - testIndexIterator(t, ident, dbC, makeTestIndexBlocks(dbC, ident, indexBlockEntriesCap-1)) - - dbD := rawdb.NewMemoryDatabase() - testIndexIterator(t, ident, dbD, makeTestIndexBlocks(dbD, ident, indexBlockEntriesCap+1)) -} - -func testIndexIterator(t *testing.T, stateIdent stateIdent, db ethdb.Database, elements []uint64) { - ir, err := newIndexReader(db, stateIdent) - if err != nil { - t.Fatalf("Failed to open the index reader, %v", err) - } - it := newIndexIterator(ir.descList, func(id uint32) (*blockReader, error) { - return newBlockReader(readStateIndexBlock(stateIdent, db, id)) - }) - - for i := 0; i < 128; i++ { - var input uint64 - if rand.Intn(2) == 0 { - input = elements[rand.Intn(len(elements))] - } else { - input = uint64(rand.Uint32()) - } - index := sort.Search(len(elements), func(i int) bool { - return elements[i] > input - }) - var ( - exp bool - expVal uint64 - remains []uint64 - ) - if index == len(elements) { - exp = false - } else { - exp = true - expVal = elements[index] - if index < len(elements) { - remains = elements[index+1:] - } - } - if err := checkSeekGT(it, input, exp, expVal); err != nil { - t.Fatal(err) - } - if exp { - if err := checkNext(it, remains); err != nil { - t.Fatal(err) - } + verifySeekGT(t, elements, ext, func(filter *extFilter) HistoryIndexIterator { + ir, err := newIndexReader(db, ident, size) + if err != nil { + t.Fatalf("Failed to open the index reader, %v", err) + } + return ir.newIterator(filter) + }) } } } @@ -242,56 +280,36 @@ func TestBlockIteratorTraversal(t *testing.T) { testBlockIterator(t, data, elements) */ - data, elements := makeTestIndexBlock(1) - testBlockIteratorTraversal(t, data, elements) + for _, size := range []int{0, 2, 34} { + for _, n := range []int{1, indexBlockRestartLen, 3 * indexBlockRestartLen} { + data, elements, ext := makeTestIndexBlock(n, size) - data, elements = makeTestIndexBlock(indexBlockRestartLen) - testBlockIteratorTraversal(t, data, elements) - - data, elements = makeTestIndexBlock(3 * indexBlockRestartLen) - testBlockIteratorTraversal(t, data, elements) - - data, elements = makeTestIndexBlock(indexBlockEntriesCap) - testBlockIteratorTraversal(t, data, elements) -} - -func testBlockIteratorTraversal(t *testing.T, data []byte, elements []uint64) { - br, err := newBlockReader(data) - if err != nil { - t.Fatalf("Failed to open the block for reading, %v", err) - } - it := newBlockIterator(br.data, br.restarts) - - if err := checkNext(it, elements); err != nil { - t.Fatal(err) + verifyTraversal(t, elements, ext, func(filter *extFilter) HistoryIndexIterator { + br, err := newBlockReader(data, size != 0) + if err != nil { + t.Fatalf("Failed to open the block for reading, %v", err) + } + return br.newIterator(filter) + }) + } } } func TestIndexIteratorTraversal(t *testing.T) { ident := newAccountIdent(common.Hash{0x1}) - dbA := rawdb.NewMemoryDatabase() - testIndexIteratorTraversal(t, ident, dbA, makeTestIndexBlocks(dbA, ident, 1)) + for _, size := range []int{0, 2, 34} { + for _, n := range []int{1, 4096, 3 * 4096} { + db := rawdb.NewMemoryDatabase() + elements, ext := makeTestIndexBlocks(db, ident, n, size) - dbB := rawdb.NewMemoryDatabase() - testIndexIteratorTraversal(t, ident, dbB, makeTestIndexBlocks(dbB, ident, 3*indexBlockEntriesCap)) - - dbC := rawdb.NewMemoryDatabase() - testIndexIteratorTraversal(t, ident, dbC, makeTestIndexBlocks(dbC, ident, indexBlockEntriesCap-1)) - - dbD := rawdb.NewMemoryDatabase() - testIndexIteratorTraversal(t, ident, dbD, makeTestIndexBlocks(dbD, ident, indexBlockEntriesCap+1)) -} - -func testIndexIteratorTraversal(t *testing.T, stateIdent stateIdent, db ethdb.KeyValueReader, elements []uint64) { - ir, err := newIndexReader(db, stateIdent) - if err != nil { - t.Fatalf("Failed to open the index reader, %v", err) - } - it := newIndexIterator(ir.descList, func(id uint32) (*blockReader, error) { - return newBlockReader(readStateIndexBlock(stateIdent, db, id)) - }) - if err := checkNext(it, elements); err != nil { - t.Fatal(err) + verifyTraversal(t, elements, ext, func(filter *extFilter) HistoryIndexIterator { + ir, err := newIndexReader(db, ident, size) + if err != nil { + t.Fatalf("Failed to open the index reader, %v", err) + } + return ir.newIterator(filter) + }) + } } } diff --git a/triedb/pathdb/history_index_test.go b/triedb/pathdb/history_index_test.go index 42cb04b001..2644db46b5 100644 --- a/triedb/pathdb/history_index_test.go +++ b/triedb/pathdb/history_index_test.go @@ -29,19 +29,25 @@ import ( ) func TestIndexReaderBasic(t *testing.T) { + testIndexReaderBasic(t, 0) + testIndexReaderBasic(t, 2) + testIndexReaderBasic(t, 34) +} + +func testIndexReaderBasic(t *testing.T, bitmapSize int) { elements := []uint64{ 1, 5, 10, 11, 20, } db := rawdb.NewMemoryDatabase() - bw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0) + bw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0, bitmapSize) for i := 0; i < len(elements); i++ { - bw.append(elements[i]) + bw.append(elements[i], randomExt(bitmapSize, 5)) } batch := db.NewBatch() bw.finish(batch) batch.Write() - br, err := newIndexReader(db, newAccountIdent(common.Hash{0xa})) + br, err := newIndexReader(db, newAccountIdent(common.Hash{0xa}), bitmapSize) if err != nil { t.Fatalf("Failed to construct the index reader, %v", err) } @@ -68,22 +74,28 @@ func TestIndexReaderBasic(t *testing.T) { } func TestIndexReaderLarge(t *testing.T) { + testIndexReaderLarge(t, 0) + testIndexReaderLarge(t, 2) + testIndexReaderLarge(t, 34) +} + +func testIndexReaderLarge(t *testing.T, bitmapSize int) { var elements []uint64 - for i := 0; i < 10*indexBlockEntriesCap; i++ { + for i := 0; i < 10*4096; i++ { elements = append(elements, rand.Uint64()) } slices.Sort(elements) db := rawdb.NewMemoryDatabase() - bw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0) + bw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0, bitmapSize) for i := 0; i < len(elements); i++ { - bw.append(elements[i]) + bw.append(elements[i], randomExt(bitmapSize, 5)) } batch := db.NewBatch() bw.finish(batch) batch.Write() - br, err := newIndexReader(db, newAccountIdent(common.Hash{0xa})) + br, err := newIndexReader(db, newAccountIdent(common.Hash{0xa}), bitmapSize) if err != nil { t.Fatalf("Failed to construct the index reader, %v", err) } @@ -107,7 +119,7 @@ func TestIndexReaderLarge(t *testing.T) { } func TestEmptyIndexReader(t *testing.T) { - br, err := newIndexReader(rawdb.NewMemoryDatabase(), newAccountIdent(common.Hash{0xa})) + br, err := newIndexReader(rawdb.NewMemoryDatabase(), newAccountIdent(common.Hash{0xa}), 0) if err != nil { t.Fatalf("Failed to construct the index reader, %v", err) } @@ -121,27 +133,33 @@ func TestEmptyIndexReader(t *testing.T) { } func TestIndexWriterBasic(t *testing.T) { + testIndexWriterBasic(t, 0) + testIndexWriterBasic(t, 2) + testIndexWriterBasic(t, 34) +} + +func testIndexWriterBasic(t *testing.T, bitmapSize int) { db := rawdb.NewMemoryDatabase() - iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0) - iw.append(2) - if err := iw.append(1); err == nil { + iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0, bitmapSize) + iw.append(2, randomExt(bitmapSize, 5)) + if err := iw.append(1, randomExt(bitmapSize, 5)); err == nil { t.Fatal("out-of-order insertion is not expected") } var maxElem uint64 for i := 0; i < 10; i++ { - iw.append(uint64(i + 3)) + iw.append(uint64(i+3), randomExt(bitmapSize, 5)) maxElem = uint64(i + 3) } batch := db.NewBatch() iw.finish(batch) batch.Write() - iw, err := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), maxElem) + iw, err := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), maxElem, bitmapSize) if err != nil { t.Fatalf("Failed to construct the block writer, %v", err) } for i := 0; i < 10; i++ { - if err := iw.append(uint64(i + 100)); err != nil { + if err := iw.append(uint64(i+100), randomExt(bitmapSize, 5)); err != nil { t.Fatalf("Failed to append item, %v", err) } } @@ -149,61 +167,37 @@ func TestIndexWriterBasic(t *testing.T) { } func TestIndexWriterWithLimit(t *testing.T) { - db := rawdb.NewMemoryDatabase() - iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0) + testIndexWriterWithLimit(t, 0) + testIndexWriterWithLimit(t, 2) + testIndexWriterWithLimit(t, 34) +} - var maxElem uint64 - for i := 0; i < indexBlockEntriesCap*2; i++ { - iw.append(uint64(i + 1)) - maxElem = uint64(i + 1) +func testIndexWriterWithLimit(t *testing.T, bitmapSize int) { + db := rawdb.NewMemoryDatabase() + iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0, bitmapSize) + + // 200 iterations (with around 50 bytes extension) is enough to cross + // the block boundary (4096 bytes) + for i := 0; i < 200; i++ { + iw.append(uint64(i+1), randomExt(bitmapSize, 50)) } batch := db.NewBatch() iw.finish(batch) batch.Write() - suites := []struct { - limit uint64 - expMax uint64 - }{ - // nothing to truncate - { - maxElem, maxElem, - }, - // truncate the last element - { - maxElem - 1, maxElem - 1, - }, - // truncation around the block boundary - { - uint64(indexBlockEntriesCap + 1), - uint64(indexBlockEntriesCap + 1), - }, - // truncation around the block boundary - { - uint64(indexBlockEntriesCap), - uint64(indexBlockEntriesCap), - }, - { - uint64(1), uint64(1), - }, - // truncate the entire index, it's in theory invalid - { - uint64(0), uint64(0), - }, - } - for i, suite := range suites { - iw, err := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), suite.limit) + for i := 0; i < 200; i++ { + limit := uint64(i + 1) + iw, err := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), limit, bitmapSize) if err != nil { t.Fatalf("Failed to construct the index writer, %v", err) } - if iw.lastID != suite.expMax { - t.Fatalf("Test %d, unexpected max value, got %d, want %d", i, iw.lastID, suite.expMax) + if iw.lastID != limit { + t.Fatalf("Test %d, unexpected max value, got %d, want %d", i, iw.lastID, limit) } - // Re-fill the elements var maxElem uint64 - for elem := suite.limit + 1; elem < indexBlockEntriesCap*4; elem++ { - if err := iw.append(elem); err != nil { + for elem := limit + 1; elem < 500; elem++ { + if err := iw.append(elem, randomExt(bitmapSize, 5)); err != nil { t.Fatalf("Failed to append value %d: %v", elem, err) } maxElem = elem @@ -215,12 +209,20 @@ func TestIndexWriterWithLimit(t *testing.T) { } func TestIndexDeleterBasic(t *testing.T) { - db := rawdb.NewMemoryDatabase() - iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0) + testIndexDeleterBasic(t, 0) + testIndexDeleterBasic(t, 2) + testIndexDeleterBasic(t, 34) +} +func testIndexDeleterBasic(t *testing.T, bitmapSize int) { + db := rawdb.NewMemoryDatabase() + iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0, bitmapSize) + + // 200 iterations (with around 50 bytes extension) is enough to cross + // the block boundary (4096 bytes) var maxElem uint64 - for i := 0; i < indexBlockEntriesCap*4; i++ { - iw.append(uint64(i + 1)) + for i := 0; i < 200; i++ { + iw.append(uint64(i+1), randomExt(bitmapSize, 50)) maxElem = uint64(i + 1) } batch := db.NewBatch() @@ -228,11 +230,11 @@ func TestIndexDeleterBasic(t *testing.T) { batch.Write() // Delete unknown id, the request should be rejected - id, _ := newIndexDeleter(db, newAccountIdent(common.Hash{0xa}), maxElem) - if err := id.pop(indexBlockEntriesCap * 5); err == nil { + id, _ := newIndexDeleter(db, newAccountIdent(common.Hash{0xa}), maxElem, bitmapSize) + if err := id.pop(500); err == nil { t.Fatal("Expect error to occur for unknown id") } - for i := indexBlockEntriesCap * 4; i >= 1; i-- { + for i := 200; i >= 1; i-- { if err := id.pop(uint64(i)); err != nil { t.Fatalf("Unexpected error for element popping, %v", err) } @@ -243,57 +245,33 @@ func TestIndexDeleterBasic(t *testing.T) { } func TestIndexDeleterWithLimit(t *testing.T) { - db := rawdb.NewMemoryDatabase() - iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0) + testIndexDeleterWithLimit(t, 0) + testIndexDeleterWithLimit(t, 2) + testIndexDeleterWithLimit(t, 34) +} - var maxElem uint64 - for i := 0; i < indexBlockEntriesCap*2; i++ { - iw.append(uint64(i + 1)) - maxElem = uint64(i + 1) +func testIndexDeleterWithLimit(t *testing.T, bitmapSize int) { + db := rawdb.NewMemoryDatabase() + iw, _ := newIndexWriter(db, newAccountIdent(common.Hash{0xa}), 0, bitmapSize) + + // 200 iterations (with around 50 bytes extension) is enough to cross + // the block boundary (4096 bytes) + for i := 0; i < 200; i++ { + iw.append(uint64(i+1), randomExt(bitmapSize, 50)) } batch := db.NewBatch() iw.finish(batch) batch.Write() - suites := []struct { - limit uint64 - expMax uint64 - }{ - // nothing to truncate - { - maxElem, maxElem, - }, - // truncate the last element - { - maxElem - 1, maxElem - 1, - }, - // truncation around the block boundary - { - uint64(indexBlockEntriesCap + 1), - uint64(indexBlockEntriesCap + 1), - }, - // truncation around the block boundary - { - uint64(indexBlockEntriesCap), - uint64(indexBlockEntriesCap), - }, - { - uint64(1), uint64(1), - }, - // truncate the entire index, it's in theory invalid - { - uint64(0), uint64(0), - }, - } - for i, suite := range suites { - id, err := newIndexDeleter(db, newAccountIdent(common.Hash{0xa}), suite.limit) + for i := 0; i < 200; i++ { + limit := uint64(i + 1) + id, err := newIndexDeleter(db, newAccountIdent(common.Hash{0xa}), limit, bitmapSize) if err != nil { t.Fatalf("Failed to construct the index writer, %v", err) } - if id.lastID != suite.expMax { - t.Fatalf("Test %d, unexpected max value, got %d, want %d", i, id.lastID, suite.expMax) + if id.lastID != limit { + t.Fatalf("Test %d, unexpected max value, got %d, want %d", i, iw.lastID, limit) } - // Keep removing elements for elem := id.lastID; elem > 0; elem-- { if err := id.pop(elem); err != nil { @@ -339,7 +317,7 @@ func TestBatchIndexerWrite(t *testing.T) { } } for addrHash, indexes := range accounts { - ir, _ := newIndexReader(db, newAccountIdent(addrHash)) + ir, _ := newIndexReader(db, newAccountIdent(addrHash), 0) for i := 0; i < len(indexes)-1; i++ { n, err := ir.readGreaterThan(indexes[i]) if err != nil { @@ -359,7 +337,7 @@ func TestBatchIndexerWrite(t *testing.T) { } for addrHash, slots := range storages { for slotHash, indexes := range slots { - ir, _ := newIndexReader(db, newStorageIdent(addrHash, slotHash)) + ir, _ := newIndexReader(db, newStorageIdent(addrHash, slotHash), 0) for i := 0; i < len(indexes)-1; i++ { n, err := ir.readGreaterThan(indexes[i]) if err != nil { diff --git a/triedb/pathdb/history_indexer.go b/triedb/pathdb/history_indexer.go index 9af7a96dc6..ddb4a293cc 100644 --- a/triedb/pathdb/history_indexer.go +++ b/triedb/pathdb/history_indexer.go @@ -34,7 +34,8 @@ import ( const ( // The batch size for reading state histories - historyReadBatch = 1000 + historyReadBatch = 1000 + historyIndexBatch = 8 * 1024 * 1024 // The number of state history indexes for constructing or deleting as batch stateHistoryIndexV0 = uint8(0) // initial version of state index structure stateHistoryIndexVersion = stateHistoryIndexV0 // the current state index version @@ -191,12 +192,12 @@ func (b *batchIndexer) finish(force bool) error { for ident, list := range b.index { eg.Go(func() error { if !b.delete { - iw, err := newIndexWriter(b.db, ident, indexed) + iw, err := newIndexWriter(b.db, ident, indexed, 0) if err != nil { return err } for _, n := range list { - if err := iw.append(n); err != nil { + if err := iw.append(n, nil); err != nil { return err } } @@ -204,7 +205,7 @@ func (b *batchIndexer) finish(force bool) error { iw.finish(batch) }) } else { - id, err := newIndexDeleter(b.db, ident, indexed) + id, err := newIndexDeleter(b.db, ident, indexed, 0) if err != nil { return err } diff --git a/triedb/pathdb/history_reader.go b/triedb/pathdb/history_reader.go index 1bf4cf648d..69e7d5bd22 100644 --- a/triedb/pathdb/history_reader.go +++ b/triedb/pathdb/history_reader.go @@ -40,8 +40,8 @@ type indexReaderWithLimitTag struct { } // newIndexReaderWithLimitTag constructs a index reader with indexing position. -func newIndexReaderWithLimitTag(db ethdb.KeyValueReader, state stateIdent, limit uint64) (*indexReaderWithLimitTag, error) { - r, err := newIndexReader(db, state) +func newIndexReaderWithLimitTag(db ethdb.KeyValueReader, state stateIdent, limit uint64, bitmapSize int) (*indexReaderWithLimitTag, error) { + r, err := newIndexReader(db, state, bitmapSize) if err != nil { return nil, err } @@ -252,7 +252,7 @@ func (r *historyReader) read(state stateIdentQuery, stateID uint64, lastID uint6 // state retrieval ir, ok := r.readers[state.String()] if !ok { - ir, err = newIndexReaderWithLimitTag(r.disk, state.stateIdent, metadata.Last) + ir, err = newIndexReaderWithLimitTag(r.disk, state.stateIdent, metadata.Last, 0) if err != nil { return nil, err } diff --git a/triedb/pathdb/history_trienode.go b/triedb/pathdb/history_trienode.go index 1004106af9..6c0c0fe8cc 100644 --- a/triedb/pathdb/history_trienode.go +++ b/triedb/pathdb/history_trienode.go @@ -159,17 +159,6 @@ func newTrienodeHistory(root common.Hash, parent common.Hash, block uint64, node } } -// sharedLen returns the length of the common prefix shared by a and b. -func sharedLen(a, b []byte) int { - n := min(len(a), len(b)) - for i := range n { - if a[i] != b[i] { - return i - } - } - return n -} - // typ implements the history interface, returning the historical data type held. func (h *trienodeHistory) typ() historyType { return typeTrienodeHistory @@ -219,7 +208,7 @@ func (h *trienodeHistory) encode() ([]byte, []byte, []byte, error) { restarts = append(restarts, internalValOffset) prefixLen = 0 } else { - prefixLen = sharedLen(prevKey, key) + prefixLen = commonPrefixLen(prevKey, key) } value := h.nodes[owner][path] diff --git a/triedb/pathdb/history_trienode_test.go b/triedb/pathdb/history_trienode_test.go index be4740a904..0c0422f00f 100644 --- a/triedb/pathdb/history_trienode_test.go +++ b/triedb/pathdb/history_trienode_test.go @@ -580,8 +580,8 @@ func TestTrienodeHistoryReaderIterator(t *testing.T) { } } -// TestSharedLen tests the sharedLen helper function -func TestSharedLen(t *testing.T) { +// TestCommonPrefixLen tests the commonPrefixLen helper function +func TestCommonPrefixLen(t *testing.T) { tests := []struct { a, b []byte expected int @@ -610,13 +610,13 @@ func TestSharedLen(t *testing.T) { } for i, test := range tests { - result := sharedLen(test.a, test.b) + result := commonPrefixLen(test.a, test.b) if result != test.expected { t.Errorf("Test %d: sharedLen(%q, %q) = %d, expected %d", i, test.a, test.b, result, test.expected) } // Test commutativity - resultReverse := sharedLen(test.b, test.a) + resultReverse := commonPrefixLen(test.b, test.a) if result != resultReverse { t.Errorf("Test %d: sharedLen is not commutative: sharedLen(a,b)=%d, sharedLen(b,a)=%d", i, result, resultReverse) diff --git a/triedb/pathdb/history_trienode_utils.go b/triedb/pathdb/history_trienode_utils.go new file mode 100644 index 0000000000..0513343404 --- /dev/null +++ b/triedb/pathdb/history_trienode_utils.go @@ -0,0 +1,83 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "encoding/binary" + "fmt" + "slices" +) + +// commonPrefixLen returns the length of the common prefix shared by a and b. +func commonPrefixLen(a, b []byte) int { + n := min(len(a), len(b)) + for i := range n { + if a[i] != b[i] { + return i + } + } + return n +} + +// encodeIDs sorts the given list of uint16 IDs and encodes them into a +// compact byte slice using variable-length unsigned integer encoding. +func encodeIDs(ids []uint16) []byte { + slices.Sort(ids) + buf := make([]byte, 0, len(ids)) + for _, id := range ids { + buf = binary.AppendUvarint(buf, uint64(id)) + } + return buf +} + +// decodeIDs decodes a sequence of variable-length encoded uint16 IDs from the +// given byte slice and returns them as a set. +// +// Returns an error if the input buffer does not contain a complete Uvarint value. +func decodeIDs(buf []byte) ([]uint16, error) { + var res []uint16 + for len(buf) > 0 { + id, n := binary.Uvarint(buf) + if n <= 0 { + return nil, fmt.Errorf("too short for decoding node id, %v", buf) + } + buf = buf[n:] + res = append(res, uint16(id)) + } + return res, nil +} + +// isAncestor reports whether node x is the ancestor of node y. +func isAncestor(x, y uint16) bool { + for y > x { + y = (y - 1) / 16 // parentID(y) = (y - 1) / 16 + if y == x { + return true + } + } + return false +} + +// isBitSet reports whether the bit at `index` in the byte slice `b` is set. +func isBitSet(b []byte, index int) bool { + return b[index/8]&(1<<(7-index%8)) != 0 +} + +// setBit sets the bit at `index` in the byte slice `b` to 1. +func setBit(b []byte, index int) { + b[index/8] |= 1 << (7 - index%8) +} diff --git a/triedb/pathdb/history_trienode_utils_test.go b/triedb/pathdb/history_trienode_utils_test.go new file mode 100644 index 0000000000..17eabb2a98 --- /dev/null +++ b/triedb/pathdb/history_trienode_utils_test.go @@ -0,0 +1,81 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "testing" +) + +func TestIsAncestor(t *testing.T) { + suites := []struct { + x, y uint16 + want bool + }{ + {0, 1, true}, + {0, 16, true}, + {0, 17, true}, + {0, 272, true}, + + {1, 0, false}, + {1, 2, false}, + {1, 17, true}, + {1, 18, true}, + {17, 273, true}, + {1, 1, false}, + } + for _, tc := range suites { + result := isAncestor(tc.x, tc.y) + if result != tc.want { + t.Fatalf("isAncestor(%d, %d) = %v, want %v", tc.x, tc.y, result, tc.want) + } + } +} + +func TestBitmapSet(t *testing.T) { + suites := []struct { + index int + expect []byte + }{ + { + 0, []byte{0b10000000, 0x0}, + }, + { + 1, []byte{0b01000000, 0x0}, + }, + { + 7, []byte{0b00000001, 0x0}, + }, + { + 8, []byte{0b00000000, 0b10000000}, + }, + { + 15, []byte{0b00000000, 0b00000001}, + }, + } + for _, tc := range suites { + var buf [2]byte + setBit(buf[:], tc.index) + + if !bytes.Equal(buf[:], tc.expect) { + t.Fatalf("bitmap = %v, want %v", buf, tc.expect) + } + if !isBitSet(buf[:], tc.index) { + t.Fatal("bit is not set") + } + } +}