triedb/pathdb: improve trienode reader for searching (#33681)
Some checks are pending
/ Docker Image (push) Waiting to run
/ Linux Build (push) Waiting to run
/ Linux Build (arm) (push) Waiting to run
/ Keeper Build (push) Waiting to run
/ Windows Build (push) Waiting to run

This PR optimizes the historical trie node reader by reworking how data
is accessed and memory is managed, reducing allocation overhead 
significantly.

Specifically:

- Instead of decoding an entire history object to locate a specific trie node, 
   the reader now searches directly within the history.

- Besides, slice pre-allocation can avoid unnecessary deep-copy significantly.
This commit is contained in:
rjl493456442 2026-01-27 20:05:35 +08:00 committed by GitHub
parent e250836973
commit 181a3ae9e0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 395 additions and 247 deletions

View file

@ -42,7 +42,7 @@ func parseIndex(blob []byte, bitmapSize int) ([]*indexBlockDesc, error) {
} }
var ( var (
lastID uint32 lastID uint32
descList []*indexBlockDesc descList = make([]*indexBlockDesc, 0, len(blob)/size)
) )
for i := 0; i < len(blob)/size; i++ { for i := 0; i < len(blob)/size; i++ {
var desc indexBlockDesc var desc indexBlockDesc

View file

@ -65,13 +65,13 @@ func (d *indexBlockDesc) encode() []byte {
return buf[:] return buf[:]
} }
// decode unpacks index block descriptor from byte stream. It's safe to mutate // decode unpacks index block descriptor from byte stream. It's unsafe to mutate
// the provided byte stream after the function call. // the provided byte stream after the function call.
func (d *indexBlockDesc) decode(blob []byte) { func (d *indexBlockDesc) decode(blob []byte) {
d.max = binary.BigEndian.Uint64(blob[:8]) d.max = binary.BigEndian.Uint64(blob[:8])
d.entries = binary.BigEndian.Uint16(blob[8:10]) d.entries = binary.BigEndian.Uint16(blob[8:10])
d.id = binary.BigEndian.Uint32(blob[10:14]) d.id = binary.BigEndian.Uint32(blob[10:14])
d.extBitmap = bytes.Clone(blob[indexBlockDescSize:]) d.extBitmap = blob[indexBlockDescSize:] // no-deep copy!
} }
// copy returns a deep-copied object. // copy returns a deep-copied object.

View file

@ -284,11 +284,8 @@ func newTrienodeReader(disk ethdb.KeyValueReader, freezer ethdb.AncientReader, r
} }
// readTrienode retrieves the trienode data from the specified trienode history. // readTrienode retrieves the trienode data from the specified trienode history.
func (r *trienodeReader) readTrienode(addrHash common.Hash, path string, historyID uint64) ([]byte, error) { func (r *trienodeReader) readTrienode(addrHash common.Hash, path string, historyID uint64) ([]byte, bool, error) {
tr, err := newTrienodeHistoryReader(historyID, r.freezer) tr := newTrienodeHistoryReader(historyID, r.freezer)
if err != nil {
return nil, err
}
return tr.read(addrHash, path) return tr.read(addrHash, path)
} }
@ -355,15 +352,19 @@ func (r *trienodeReader) readOptimized(state stateIdent, it HistoryIndexIterator
seq += 1 seq += 1
eg.Go(func() error { eg.Go(func() error {
data, found, err := r.readTrienode(state.addressHash, state.path, id)
if err != nil {
term.Store(true)
return err
}
// In optimistic readahead mode, it is theoretically possible to encounter a // In optimistic readahead mode, it is theoretically possible to encounter a
// NotFound error, where the trie node does not actually exist and the iterator // NotFound error, where the trie node does not actually exist and the iterator
// reports a false-positive mutation record. Terminate the iterator if so, as // reports a false-positive mutation record. Terminate the iterator if so, as
// all the necessary data (checkpoints and all diffs) required has already been // all the necessary data (checkpoints and all diffs) required has already been
// fetching. // fetching.
data, err := r.readTrienode(state.addressHash, state.path, id) if !found {
if err != nil {
term.Store(true) term.Store(true)
log.Debug("Failed to read the trienode", "err", err) log.Debug("Failed to read the trienode")
return nil return nil
} }
full, _, err := decodeNodeFull(data) full, _, err := decodeNodeFull(data)

View file

@ -46,7 +46,10 @@ import (
// - block number (8 bytes) // - block number (8 bytes)
// //
// - a lexicographically sorted list of trie IDs // - a lexicographically sorted list of trie IDs
// - the corresponding offsets into the key and value sections for each trie data chunk // - the corresponding offsets into the key and value sections for each trie
// data chunk. The offsets refer to the end position of each chunk, with
// the assumption that the key and value sections for the first data chunk
// start at offset 0.
// //
// Although some fields (e.g., parent state root, block number) are duplicated // Although some fields (e.g., parent state root, block number) are duplicated
// between the state history and the trienode history, these two histories // between the state history and the trienode history, these two histories
@ -55,19 +58,16 @@ import (
// //
// # Key section // # Key section
// The key section stores trie node keys (paths) in a compressed format. // The key section stores trie node keys (paths) in a compressed format.
// It also contains relative offsets into the value section for resolving // It also contains relative offsets into the value section for locating
// the corresponding trie node data. Note that these offsets are relative // the corresponding trie node data. These offsets are relative to the
// to the data chunk for the trie; the chunk offset must be added to obtain // beginning of the trie data chunk, the chunk's base offset must be added
// the absolute position. // to obtain the absolute position in the value section.
// //
// # Value section // # Value section
// The value section is a concatenated byte stream of all trie node data. // The value section is a concatenated byte stream of all trie node data.
// Each trie node can be retrieved using the offset and length specified // Each trie node can be retrieved using the offset and length specified
// by its index entry. // by its index entry.
// //
// The header and key sections are sufficient for locating a trie node,
// while a partial read of the value section is enough to retrieve its data.
// Header section: // Header section:
// //
// +----------+------------------+---------------------+---------------------+-------+------------------+---------------------+---------------------| // +----------+------------------+---------------------+---------------------+-------+------------------+---------------------+---------------------|
@ -89,9 +89,9 @@ import (
// //
// +---- key len ----+ // +---- key len ----+
// / \ // / \
// +-------+---------+-----------+---------+-----------------------+-----------------+ // +-------+---------+-----------+---------+-----------------------+-----------------------+
// | shared (varint) | not shared (varint) | value length (varlen) | key (varlen) | // | shared (varint) | not shared (varint) | value length (varlen) | unshared key (varlen) |
// +-----------------+---------------------+-----------------------+-----------------+ // +-----------------+---------------------+-----------------------+-----------------------+
// //
// trailer: // trailer:
// //
@ -101,9 +101,9 @@ import (
// | restart_1 key offset | restart_1 value offset | ... | restart number (4-bytes) | // | restart_1 key offset | restart_1 value offset | ... | restart number (4-bytes) |
// +----------------------+------------------------+-----+--------------------------+ // +----------------------+------------------------+-----+--------------------------+
// //
// Note: Both the key offset and the value offset are relative to the start of // Note: Both the key offset and the value offset are relative to the beginning
// the trie data chunk. To obtain the absolute offset, add the offset of the // of the trie data chunk. The chunk's base offset must be added to obtain the
// trie data chunk itself. // absolute position in the value section.
// //
// Value section: // Value section:
// //
@ -140,9 +140,12 @@ type trienodeHistory struct {
// newTrienodeHistory constructs a trienode history with the provided trie nodes. // newTrienodeHistory constructs a trienode history with the provided trie nodes.
func newTrienodeHistory(root common.Hash, parent common.Hash, block uint64, nodes map[common.Hash]map[string][]byte) *trienodeHistory { func newTrienodeHistory(root common.Hash, parent common.Hash, block uint64, nodes map[common.Hash]map[string][]byte) *trienodeHistory {
nodeList := make(map[common.Hash][]string) nodeList := make(map[common.Hash][]string, len(nodes))
for owner, subset := range nodes { for owner, subset := range nodes {
keys := sort.StringSlice(slices.Collect(maps.Keys(subset))) keys := make(sort.StringSlice, 0, len(subset))
for k := range subset {
keys = append(keys, k)
}
keys.Sort() keys.Sort()
nodeList[owner] = keys nodeList[owner] = keys
} }
@ -222,11 +225,16 @@ func (h *trienodeHistory) encode() ([]byte, []byte, []byte, error) {
restarts []uint32 restarts []uint32
prefixLen int prefixLen int
internalKeyOffset uint32 // key offset for the trie internally internalKeyOffset uint32 // key offset within the trie data internally
internalValOffset uint32 // value offset for the trie internally internalValOffset uint32 // value offset within the trie data internally
) )
for i, path := range h.nodeList[owner] { for i, path := range h.nodeList[owner] {
key := []byte(path) key := []byte(path)
// Track the internal key and value offsets at the beginning of the
// restart section. The absolute offsets within the key and value
// sections should first include the offset of the trie chunk itself
// stored in the header section.
if i%trienodeDataBlockRestartLen == 0 { if i%trienodeDataBlockRestartLen == 0 {
restarts = append(restarts, internalKeyOffset) restarts = append(restarts, internalKeyOffset)
restarts = append(restarts, internalValOffset) restarts = append(restarts, internalValOffset)
@ -271,18 +279,13 @@ func (h *trienodeHistory) encode() ([]byte, []byte, []byte, error) {
} }
// Fill the header section with the offsets of the key and value sections. // Fill the header section with the offsets of the key and value sections.
// Note that the key/value offsets are intentionally tracked *after* encoding // Note that key/value offsets are intentionally recorded *after* encoding
// them into their respective sections, ensuring each offset refers to the end // into their respective sections, so each offset refers to an end position.
// position. For n trie chunks, n offset pairs are sufficient to uniquely locate // For n trie chunks, n offset pairs are sufficient to uniquely locate each
// the corresponding data. // chunk's data. For example, [0, offset_0] defines the range of trie chunk 0,
headerSection.Write(owner.Bytes()) // 32 bytes // while [offset_{n-2}, offset_{n-1}] defines the range of trie chunk n-1.
binary.Write(&headerSection, binary.BigEndian, uint32(keySection.Len())) // 4 bytes headerSection.Write(owner.Bytes()) // 32 bytes
binary.Write(&headerSection, binary.BigEndian, uint32(keySection.Len())) // 4 bytes
// The offset to the value section is theoretically unnecessary, since the
// individual value offset is already tracked in the key section. However,
// we still keep it here for two reasons:
// - It's cheap to store (only 4 bytes for each trie).
// - It can be useful for decoding the trie data when key is not required (e.g., in hash mode).
binary.Write(&headerSection, binary.BigEndian, uint32(valueSection.Len())) // 4 bytes binary.Write(&headerSection, binary.BigEndian, uint32(valueSection.Len())) // 4 bytes
} }
return headerSection.Bytes(), keySection.Bytes(), valueSection.Bytes(), nil return headerSection.Bytes(), keySection.Bytes(), valueSection.Bytes(), nil
@ -345,32 +348,68 @@ func decodeHeader(data []byte) (*trienodeMetadata, []common.Hash, []uint32, []ui
}, owners, keyOffsets, valOffsets, nil }, owners, keyOffsets, valOffsets, nil
} }
func decodeSingle(keySection []byte, onValue func([]byte, int, int) error) ([]string, error) { // decodeKeyEntry resolves a single entry from the key section starting from
// the specified offset.
func decodeKeyEntry(keySection []byte, offset int) (uint64, uint64, []byte, int, error) {
var byteRead int
// Resolve the length of shared key
nShared, nn := binary.Uvarint(keySection[offset:]) // key length shared (varint)
if nn <= 0 {
return 0, 0, nil, 0, fmt.Errorf("corrupted varint encoding for nShared at offset %d", offset)
}
byteRead += nn
// Resolve the length of unshared key
nUnshared, nn := binary.Uvarint(keySection[offset+byteRead:]) // key length not shared (varint)
if nn <= 0 {
return 0, 0, nil, 0, fmt.Errorf("corrupted varint encoding for nUnshared at offset %d", offset+byteRead)
}
byteRead += nn
// Resolve the length of value
nValue, nn := binary.Uvarint(keySection[offset+byteRead:]) // value length (varint)
if nn <= 0 {
return 0, 0, nil, 0, fmt.Errorf("corrupted varint encoding for nValue at offset %d", offset+byteRead)
}
byteRead += nn
// Validate that the values can fit in an int to prevent overflow on 32-bit systems
if nShared > uint64(math.MaxUint32) || nUnshared > uint64(math.MaxUint32) || nValue > uint64(math.MaxUint32) {
return 0, 0, nil, 0, errors.New("key/value size too large")
}
// Resolve the unshared key
if offset+byteRead+int(nUnshared) > len(keySection) {
return 0, 0, nil, 0, fmt.Errorf("key length too long, unshared key length: %d, off: %d, section size: %d", nUnshared, offset+byteRead, len(keySection))
}
unsharedKey := keySection[offset+byteRead : offset+byteRead+int(nUnshared)]
byteRead += int(nUnshared)
return nShared, nValue, unsharedKey, byteRead, nil
}
// decodeRestartTrailer resolves all the offsets recorded at the trailer.
func decodeRestartTrailer(keySection []byte) ([]uint32, []uint32, int, error) {
var ( var (
prevKey []byte
items int
keyOffsets []uint32 keyOffsets []uint32
valOffsets []uint32 valOffsets []uint32
keyOff int // the key offset within the single trie data
valOff int // the value offset within the single trie data
keys []string
) )
// Decode the number of restart section // Decode the number of restart section
if len(keySection) < 4 { if len(keySection) < 4 {
return nil, fmt.Errorf("key section too short, size: %d", len(keySection)) return nil, nil, 0, fmt.Errorf("key section too short, size: %d", len(keySection))
} }
nRestarts := binary.BigEndian.Uint32(keySection[len(keySection)-4:]) nRestarts := binary.BigEndian.Uint32(keySection[len(keySection)-4:])
// Decode the trailer
if len(keySection) < int(8*nRestarts)+4 { if len(keySection) < int(8*nRestarts)+4 {
return nil, fmt.Errorf("key section too short, restarts: %d, size: %d", nRestarts, len(keySection)) return nil, nil, 0, fmt.Errorf("key section too short, restarts: %d, size: %d", nRestarts, len(keySection))
} }
for i := range int(nRestarts) { for i := range int(nRestarts) {
o := len(keySection) - 4 - (int(nRestarts)-i)*8 o := len(keySection) - 4 - (int(nRestarts)-i)*8
keyOffset := binary.BigEndian.Uint32(keySection[o : o+4]) keyOffset := binary.BigEndian.Uint32(keySection[o : o+4])
if i != 0 && keyOffset <= keyOffsets[i-1] { if i != 0 && keyOffset <= keyOffsets[i-1] {
return nil, fmt.Errorf("key offset is out of order, prev: %v, cur: %v", keyOffsets[i-1], keyOffset) return nil, nil, 0, fmt.Errorf("key offset is out of order, prev: %v, cur: %v", keyOffsets[i-1], keyOffset)
} }
keyOffsets = append(keyOffsets, keyOffset) keyOffsets = append(keyOffsets, keyOffset)
@ -378,99 +417,118 @@ func decodeSingle(keySection []byte, onValue func([]byte, int, int) error) ([]st
// section have zero-size value. // section have zero-size value.
valOffset := binary.BigEndian.Uint32(keySection[o+4 : o+8]) valOffset := binary.BigEndian.Uint32(keySection[o+4 : o+8])
if i != 0 && valOffset < valOffsets[i-1] { if i != 0 && valOffset < valOffsets[i-1] {
return nil, fmt.Errorf("value offset is out of order, prev: %v, cur: %v", valOffsets[i-1], valOffset) return nil, nil, 0, fmt.Errorf("value offset is out of order, prev: %v, cur: %v", valOffsets[i-1], valOffset)
} }
valOffsets = append(valOffsets, valOffset) valOffsets = append(valOffsets, valOffset)
} }
keyLimit := len(keySection) - 4 - int(nRestarts)*8 keyLimit := len(keySection) - 4 - int(nRestarts)*8 // End of key data
return keyOffsets, valOffsets, keyLimit, nil
}
// decodeRestartSection resolves all entries in a restart section. The keyData
// contains the encoded keys for the section.
//
// onValue is the callback function being invoked for each resolved entry. The
// start and limit are the offsets within the restart section, the base value
// offset of the restart section itself should be added by the caller itself.
// What's more, this function should return `aborted == true` if the entry
// resolution should be terminated.
func decodeRestartSection(keyData []byte, onValue func(key []byte, start int, limit int) (bool, error)) error {
var (
prevKey []byte
items int
keyOff int // the key offset within the single trie data
valOff int // the value offset within the single trie data
)
// Decode data // Decode data
for keyOff < keyLimit { for keyOff < len(keyData) {
// Validate the key and value offsets within the single trie data chunk nShared, nValue, unsharedKey, nn, err := decodeKeyEntry(keyData, keyOff)
if items%trienodeDataBlockRestartLen == 0 { if err != nil {
restartIndex := items / trienodeDataBlockRestartLen return err
if restartIndex >= len(keyOffsets) {
return nil, fmt.Errorf("restart index out of range: %d, available restarts: %d", restartIndex, len(keyOffsets))
}
if keyOff != int(keyOffsets[restartIndex]) {
return nil, fmt.Errorf("key offset is not matched, recorded: %d, want: %d", keyOffsets[restartIndex], keyOff)
}
if valOff != int(valOffsets[restartIndex]) {
return nil, fmt.Errorf("value offset is not matched, recorded: %d, want: %d", valOffsets[restartIndex], valOff)
}
}
// Resolve the entry from key section
nShared, nn := binary.Uvarint(keySection[keyOff:]) // key length shared (varint)
if nn <= 0 {
return nil, fmt.Errorf("corrupted varint encoding for nShared at offset %d", keyOff)
} }
keyOff += nn keyOff += nn
nUnshared, nn := binary.Uvarint(keySection[keyOff:]) // key length not shared (varint)
if nn <= 0 {
return nil, fmt.Errorf("corrupted varint encoding for nUnshared at offset %d", keyOff)
}
keyOff += nn
nValue, nn := binary.Uvarint(keySection[keyOff:]) // value length (varint)
if nn <= 0 {
return nil, fmt.Errorf("corrupted varint encoding for nValue at offset %d", keyOff)
}
keyOff += nn
// Validate that the values can fit in an int to prevent overflow on 32-bit systems
if nShared > uint64(math.MaxUint32) || nUnshared > uint64(math.MaxUint32) || nValue > uint64(math.MaxUint32) {
return nil, errors.New("key size too large")
}
// Resolve unshared key
if keyOff+int(nUnshared) > len(keySection) {
return nil, fmt.Errorf("key length too long, unshared key length: %d, off: %d, section size: %d", nUnshared, keyOff, len(keySection))
}
unsharedKey := keySection[keyOff : keyOff+int(nUnshared)]
keyOff += int(nUnshared)
// Assemble the full key // Assemble the full key
var key []byte var key []byte
if items%trienodeDataBlockRestartLen == 0 { if items%trienodeDataBlockRestartLen == 0 {
if nShared != 0 { if nShared != 0 {
return nil, fmt.Errorf("unexpected non-zero shared key prefix: %d", nShared) return fmt.Errorf("unexpected non-zero shared key prefix: %d", nShared)
} }
key = unsharedKey key = unsharedKey
} else { } else {
// TODO(rjl493456442) mitigate the allocation pressure.
if int(nShared) > len(prevKey) { if int(nShared) > len(prevKey) {
return nil, fmt.Errorf("unexpected shared key prefix: %d, prefix key length: %d", nShared, len(prevKey)) return fmt.Errorf("unexpected shared key prefix: %d, prefix key length: %d", nShared, len(prevKey))
} }
key = append([]byte{}, prevKey[:nShared]...) key = make([]byte, int(nShared)+len(unsharedKey))
key = append(key, unsharedKey...) copy(key[:nShared], prevKey[:nShared])
copy(key[nShared:], unsharedKey)
} }
if items != 0 && bytes.Compare(prevKey, key) >= 0 { if items != 0 && bytes.Compare(prevKey, key) >= 0 {
return nil, fmt.Errorf("trienode paths are out of order, prev: %v, cur: %v", prevKey, key) return fmt.Errorf("trienode paths are out of order, prev: %v, cur: %v", prevKey, key)
} }
prevKey = key prevKey = key
// Resolve value valEnd := valOff + int(nValue)
if onValue != nil { abort, err := onValue(key, valOff, valEnd)
if err := onValue(key, valOff, valOff+int(nValue)); err != nil { if err != nil {
return nil, err return err
}
} }
valOff += int(nValue) if abort {
return nil
}
valOff = valEnd
items++ items++
keys = append(keys, string(key))
} }
if keyOff != keyLimit { if keyOff != len(keyData) {
return nil, fmt.Errorf("excessive key data after decoding, offset: %d, size: %d", keyOff, keyLimit) return fmt.Errorf("excessive key data after decoding, offset: %d, size: %d", keyOff, len(keyData))
} }
return keys, nil return nil
}
// onValue is the callback function being invoked for each resolved entry. The
// start and limit are the offsets within this trie chunk, the base value
// offset of the trie chunk itself should be added by the caller itself.
func decodeSingle(keySection []byte, onValue func([]byte, int, int) error) error {
keyOffsets, valOffsets, keyLimit, err := decodeRestartTrailer(keySection)
if err != nil {
return err
}
for i := 0; i < len(keyOffsets); i++ {
var keyData []byte
if i == len(keyOffsets)-1 {
keyData = keySection[keyOffsets[i]:keyLimit]
} else {
keyData = keySection[keyOffsets[i]:keyOffsets[i+1]]
}
err := decodeRestartSection(keyData, func(key []byte, start int, limit int) (bool, error) {
valStart := int(valOffsets[i]) + start
valLimit := int(valOffsets[i]) + limit
// Possible in tests
if onValue == nil {
return false, nil
}
if err := onValue(key, valStart, valLimit); err != nil {
return false, err
}
return false, nil // abort=false
})
if err != nil {
return err
}
}
return nil
} }
func decodeSingleWithValue(keySection []byte, valueSection []byte) ([]string, map[string][]byte, error) { func decodeSingleWithValue(keySection []byte, valueSection []byte) ([]string, map[string][]byte, error) {
var ( var (
offset int offset int
nodes = make(map[string][]byte) estimated = len(keySection) / 8
nodes = make(map[string][]byte, estimated)
paths = make([]string, 0, estimated)
) )
paths, err := decodeSingle(keySection, func(key []byte, start int, limit int) error { err := decodeSingle(keySection, func(key []byte, start int, limit int) error {
if start != offset { if start != offset {
return fmt.Errorf("gapped value section offset: %d, want: %d", start, offset) return fmt.Errorf("gapped value section offset: %d, want: %d", start, offset)
} }
@ -481,7 +539,9 @@ func decodeSingleWithValue(keySection []byte, valueSection []byte) ([]string, ma
if start > len(valueSection) || limit > len(valueSection) { if start > len(valueSection) || limit > len(valueSection) {
return fmt.Errorf("value section out of range: start: %d, limit: %d, size: %d", start, limit, len(valueSection)) return fmt.Errorf("value section out of range: start: %d, limit: %d, size: %d", start, limit, len(valueSection))
} }
nodes[string(key)] = valueSection[start:limit] strkey := string(key)
paths = append(paths, strkey)
nodes[strkey] = valueSection[start:limit]
offset = limit offset = limit
return nil return nil
@ -507,7 +567,8 @@ func (h *trienodeHistory) decode(header []byte, keySection []byte, valueSection
h.nodes = make(map[common.Hash]map[string][]byte) h.nodes = make(map[common.Hash]map[string][]byte)
for i := range len(owners) { for i := range len(owners) {
// Resolve the boundary of key section // Resolve the boundary of the key section, each offset referring
// to the end position of this trie chunk.
var keyStart, keyLimit uint32 var keyStart, keyLimit uint32
if i != 0 { if i != 0 {
keyStart = keyOffsets[i-1] keyStart = keyOffsets[i-1]
@ -517,7 +578,8 @@ func (h *trienodeHistory) decode(header []byte, keySection []byte, valueSection
return fmt.Errorf("invalid key offsets: keyStart: %d, keyLimit: %d, size: %d", keyStart, keyLimit, len(keySection)) return fmt.Errorf("invalid key offsets: keyStart: %d, keyLimit: %d, size: %d", keyStart, keyLimit, len(keySection))
} }
// Resolve the boundary of value section // Resolve the boundary of the value section, each offset referring
// to the end position of this trie chunk.
var valStart, valLimit uint32 var valStart, valLimit uint32
if i != 0 { if i != 0 {
valStart = valueOffsets[i-1] valStart = valueOffsets[i-1]
@ -547,133 +609,175 @@ func (ir iRange) len() uint32 {
return ir.limit - ir.start return ir.limit - ir.start
} }
// singleTrienodeHistoryReader provides read access to a single trie within the
// trienode history. It stores an offset to the trie's position in the history,
// along with a set of per-node offsets that can be resolved on demand.
type singleTrienodeHistoryReader struct { type singleTrienodeHistoryReader struct {
id uint64 id uint64
reader ethdb.AncientReader reader ethdb.AncientReader
valueRange iRange // value range within the global value section keyData []byte
valueInternalOffsets map[string]iRange // value offset within the single trie data valueRange iRange
} }
// TODO(rjl493456442): This function performs a large number of allocations.
// Given the large data size, a byte pool could be used to mitigate this.
func newSingleTrienodeHistoryReader(id uint64, reader ethdb.AncientReader, keyRange iRange, valueRange iRange) (*singleTrienodeHistoryReader, error) { func newSingleTrienodeHistoryReader(id uint64, reader ethdb.AncientReader, keyRange iRange, valueRange iRange) (*singleTrienodeHistoryReader, error) {
// TODO(rjl493456442) the data size is known in advance, allocating the
// dedicated byte slices from the pool.
keyData, err := rawdb.ReadTrienodeHistoryKeySection(reader, id, uint64(keyRange.start), uint64(keyRange.len())) keyData, err := rawdb.ReadTrienodeHistoryKeySection(reader, id, uint64(keyRange.start), uint64(keyRange.len()))
if err != nil { if err != nil {
return nil, err return nil, err
} }
valueOffsets := make(map[string]iRange)
_, err = decodeSingle(keyData, func(key []byte, start int, limit int) error {
valueOffsets[string(key)] = iRange{
start: uint32(start),
limit: uint32(limit),
}
return nil
})
if err != nil {
return nil, err
}
return &singleTrienodeHistoryReader{ return &singleTrienodeHistoryReader{
id: id, id: id,
reader: reader, reader: reader,
valueRange: valueRange, keyData: keyData,
valueInternalOffsets: valueOffsets, valueRange: valueRange,
}, nil }, nil
} }
// read retrieves the trie node data with the provided node path. // searchSingle searches for a specific trie node identified by the provided
func (sr *singleTrienodeHistoryReader) read(path string) ([]byte, error) { // key within a single trie node chunk.
offset, exists := sr.valueInternalOffsets[path] //
if !exists { // It returns the node value's offset range (start and limit) within the
return nil, fmt.Errorf("trienode %v not found", []byte(path)) // trie node data. An error is returned if the node cannot be found.
func (sr *singleTrienodeHistoryReader) searchSingle(key []byte) (int, int, bool, error) {
keyOffsets, valOffsets, keyLimit, err := decodeRestartTrailer(sr.keyData)
if err != nil {
return 0, 0, false, err
} }
return rawdb.ReadTrienodeHistoryValueSection(sr.reader, sr.id, uint64(sr.valueRange.start+offset.start), uint64(offset.len())) // Binary search against the boundary keys for each restart section
var (
boundFind bool
boundValueLen uint64
)
pos := sort.Search(len(keyOffsets), func(i int) bool {
_, nValue, dkey, _, derr := decodeKeyEntry(sr.keyData[keyOffsets[i]:], 0)
if derr != nil {
err = derr
return false
}
n := bytes.Compare(key, dkey)
if n == 0 {
boundFind = true
boundValueLen = nValue
}
return n <= 0
})
if err != nil {
return 0, 0, false, err
}
// The node is found as the boundary of restart section
if boundFind {
start := valOffsets[pos]
limit := valOffsets[pos] + uint32(boundValueLen)
return int(start), int(limit), true, nil
}
// The node is not found as all others have larger key than the target
if pos == 0 {
return 0, 0, false, nil
}
// Search the target node within the restart section
var keyData []byte
if pos == len(keyOffsets) {
keyData = sr.keyData[keyOffsets[pos-1]:keyLimit] // last section
} else {
keyData = sr.keyData[keyOffsets[pos-1]:keyOffsets[pos]] // non-last section
}
var (
nStart int
nLimit int
found bool
)
err = decodeRestartSection(keyData, func(ikey []byte, start, limit int) (bool, error) {
if bytes.Equal(key, ikey) {
nStart = int(valOffsets[pos-1]) + start
nLimit = int(valOffsets[pos-1]) + limit
found = true
return true, nil // abort = true
}
return false, nil // abort = false
})
if err != nil {
return 0, 0, false, err
}
if !found {
return 0, 0, false, nil
}
return nStart, nLimit, true, nil
}
// read retrieves the trie node data with the provided node path.
func (sr *singleTrienodeHistoryReader) read(key []byte) ([]byte, bool, error) {
start, limit, found, err := sr.searchSingle(key)
if err != nil {
return nil, false, err
}
if !found {
return nil, false, nil
}
valStart := uint64(start) + uint64(sr.valueRange.start)
valLen := uint64(limit - start)
value, err := rawdb.ReadTrienodeHistoryValueSection(sr.reader, sr.id, valStart, valLen)
if err != nil {
return nil, false, err
}
return value, true, nil
} }
// trienodeHistoryReader provides read access to node data in the trie node history. // trienodeHistoryReader provides read access to node data in the trie node history.
// It resolves data from the underlying ancient store only when needed, minimizing // It resolves data from the underlying ancient store only when needed, minimizing
// I/O overhead. // I/O overhead.
type trienodeHistoryReader struct { type trienodeHistoryReader struct {
id uint64 // ID of the associated trienode history id uint64 // ID of the associated trienode history
reader ethdb.AncientReader // Database reader of ancient store reader ethdb.AncientReader // Database reader of ancient store
keyRanges map[common.Hash]iRange // Key ranges identifying trie chunks
valRanges map[common.Hash]iRange // Value ranges identifying trie chunks
iReaders map[common.Hash]*singleTrienodeHistoryReader // readers for each individual trie chunk
} }
// newTrienodeHistoryReader constructs the reader for specific trienode history. // newTrienodeHistoryReader constructs the reader for specific trienode history.
func newTrienodeHistoryReader(id uint64, reader ethdb.AncientReader) (*trienodeHistoryReader, error) { func newTrienodeHistoryReader(id uint64, reader ethdb.AncientReader) *trienodeHistoryReader {
r := &trienodeHistoryReader{ return &trienodeHistoryReader{
id: id, id: id,
reader: reader, reader: reader,
keyRanges: make(map[common.Hash]iRange),
valRanges: make(map[common.Hash]iRange),
iReaders: make(map[common.Hash]*singleTrienodeHistoryReader),
} }
if err := r.decodeHeader(); err != nil {
return nil, err
}
return r, nil
} }
// decodeHeader decodes the header section of trienode history. // decodeHeader decodes the header section of trienode history.
func (r *trienodeHistoryReader) decodeHeader() error { func (r *trienodeHistoryReader) decodeHeader(owner common.Hash) (iRange, iRange, bool, error) {
header, err := rawdb.ReadTrienodeHistoryHeader(r.reader, r.id) header, err := rawdb.ReadTrienodeHistoryHeader(r.reader, r.id)
if err != nil { if err != nil {
return err return iRange{}, iRange{}, false, err
} }
_, owners, keyOffsets, valOffsets, err := decodeHeader(header) _, owners, keyOffsets, valOffsets, err := decodeHeader(header)
if err != nil { if err != nil {
return err return iRange{}, iRange{}, false, err
} }
for i, owner := range owners { pos := sort.Search(len(owners), func(i int) bool {
// Decode the key range for this trie chunk return owner.Cmp(owners[i]) <= 0
var keyStart uint32 })
if i != 0 { if pos == len(owners) || owners[pos] != owner {
keyStart = keyOffsets[i-1] return iRange{}, iRange{}, false, nil
} }
r.keyRanges[owner] = iRange{ var keyRange iRange
start: keyStart, if pos != 0 {
limit: keyOffsets[i], keyRange.start = keyOffsets[pos-1]
} }
keyRange.limit = keyOffsets[pos]
// Decode the value range for this trie chunk var valRange iRange
var valStart uint32 if pos != 0 {
if i != 0 { valRange.start = valOffsets[pos-1]
valStart = valOffsets[i-1]
}
r.valRanges[owner] = iRange{
start: valStart,
limit: valOffsets[i],
}
} }
return nil valRange.limit = valOffsets[pos]
return keyRange, valRange, true, nil
} }
// read retrieves the trie node data with the provided TrieID and node path. // read retrieves the trie node data with the provided TrieID and node path.
func (r *trienodeHistoryReader) read(owner common.Hash, path string) ([]byte, error) { func (r *trienodeHistoryReader) read(owner common.Hash, path string) ([]byte, bool, error) {
ir, ok := r.iReaders[owner] keyRange, valRange, found, err := r.decodeHeader(owner)
if !ok { if err != nil {
keyRange, exists := r.keyRanges[owner] return nil, false, err
if !exists {
return nil, fmt.Errorf("trie %x is unknown", owner)
}
valRange, exists := r.valRanges[owner]
if !exists {
return nil, fmt.Errorf("trie %x is unknown", owner)
}
var err error
ir, err = newSingleTrienodeHistoryReader(r.id, r.reader, keyRange, valRange)
if err != nil {
return nil, err
}
r.iReaders[owner] = ir
} }
return ir.read(path) if !found {
return nil, false, nil
}
ir, err := newSingleTrienodeHistoryReader(r.id, r.reader, keyRange, valRange)
if err != nil {
return nil, false, err
}
return ir.read([]byte(path))
} }
// writeTrienodeHistory persists the trienode history associated with the given diff layer. // writeTrienodeHistory persists the trienode history associated with the given diff layer.
@ -707,7 +811,6 @@ func writeTrienodeHistory(writer ethdb.AncientWriter, dl *diffLayer, rate uint32
} }
// readTrienodeMetadata resolves the metadata of the specified trienode history. // readTrienodeMetadata resolves the metadata of the specified trienode history.
// nolint:unused
func readTrienodeMetadata(reader ethdb.AncientReader, id uint64) (*trienodeMetadata, error) { func readTrienodeMetadata(reader ethdb.AncientReader, id uint64) (*trienodeMetadata, error) {
header, err := rawdb.ReadTrienodeHistoryHeader(reader, id) header, err := rawdb.ReadTrienodeHistoryHeader(reader, id)
if err != nil { if err != nil {

View file

@ -19,6 +19,7 @@ package pathdb
import ( import (
"bytes" "bytes"
"encoding/binary" "encoding/binary"
"fmt"
"math/rand" "math/rand"
"reflect" "reflect"
"testing" "testing"
@ -137,14 +138,11 @@ func TestTrienodeHistoryReader(t *testing.T) {
} }
} }
for i, h := range hs { for i, h := range hs {
tr, err := newTrienodeHistoryReader(uint64(i+1), freezer) tr := newTrienodeHistoryReader(uint64(i+1), freezer)
if err != nil {
t.Fatalf("Failed to construct the history reader: %v", err)
}
for _, owner := range h.owners { for _, owner := range h.owners {
nodes := h.nodes[owner] nodes := h.nodes[owner]
for key, value := range nodes { for key, value := range nodes {
blob, err := tr.read(owner, key) blob, _, err := tr.read(owner, key)
if err != nil { if err != nil {
t.Fatalf("Failed to read trienode history: %v", err) t.Fatalf("Failed to read trienode history: %v", err)
} }
@ -417,23 +415,23 @@ func TestTrienodeHistoryReaderNonExistentPath(t *testing.T) {
if err := rawdb.WriteTrienodeHistory(freezer, 1, header, keySection, valueSection); err != nil { if err := rawdb.WriteTrienodeHistory(freezer, 1, header, keySection, valueSection); err != nil {
t.Fatalf("Failed to write trienode history: %v", err) t.Fatalf("Failed to write trienode history: %v", err)
} }
tr := newTrienodeHistoryReader(1, freezer)
tr, err := newTrienodeHistoryReader(1, freezer)
if err != nil {
t.Fatalf("Failed to construct history reader: %v", err)
}
// Try to read a non-existent path // Try to read a non-existent path
_, err = tr.read(testrand.Hash(), "nonexistent") var (
if err == nil { err error
t.Fatal("Expected error for non-existent trie owner") found bool
)
_, found, err = tr.read(testrand.Hash(), "nonexistent")
if found || err != nil {
t.Fatal("Expected not found for non-existent trie owner")
} }
// Try to read from existing owner but non-existent path // Try to read from existing owner but non-existent path
owner := h.owners[0] owner := h.owners[0]
_, err = tr.read(owner, "nonexistent-path") _, found, err = tr.read(owner, "nonexistent-path")
if err == nil { if found || err != nil {
t.Fatal("Expected error for non-existent path") t.Fatal("Expected not found for non-existent path")
} }
} }
@ -457,23 +455,19 @@ func TestTrienodeHistoryReaderNilValues(t *testing.T) {
if err := rawdb.WriteTrienodeHistory(freezer, 1, header, keySection, valueSection); err != nil { if err := rawdb.WriteTrienodeHistory(freezer, 1, header, keySection, valueSection); err != nil {
t.Fatalf("Failed to write trienode history: %v", err) t.Fatalf("Failed to write trienode history: %v", err)
} }
tr := newTrienodeHistoryReader(1, freezer)
tr, err := newTrienodeHistoryReader(1, freezer)
if err != nil {
t.Fatalf("Failed to construct history reader: %v", err)
}
// Test reading nil values // Test reading nil values
data1, err := tr.read(owner, "nil1") data1, found, err := tr.read(owner, "nil1")
if err != nil { if err != nil || !found {
t.Fatalf("Failed to read nil value: %v", err) t.Fatalf("Failed to read nil value: %v", err)
} }
if len(data1) != 0 { if len(data1) != 0 {
t.Fatal("Expected nil data for nil value") t.Fatal("Expected nil data for nil value")
} }
data2, err := tr.read(owner, "nil2") data2, found, err := tr.read(owner, "nil2")
if err != nil { if err != nil || !found {
t.Fatalf("Failed to read nil value: %v", err) t.Fatalf("Failed to read nil value: %v", err)
} }
if len(data2) != 0 { if len(data2) != 0 {
@ -481,8 +475,8 @@ func TestTrienodeHistoryReaderNilValues(t *testing.T) {
} }
// Test reading non-nil value // Test reading non-nil value
data3, err := tr.read(owner, "data1") data3, found, err := tr.read(owner, "data1")
if err != nil { if err != nil || !found {
t.Fatalf("Failed to read non-nil value: %v", err) t.Fatalf("Failed to read non-nil value: %v", err)
} }
if !bytes.Equal(data3, []byte("some data")) { if !bytes.Equal(data3, []byte("some data")) {
@ -498,7 +492,7 @@ func TestTrienodeHistoryReaderNilKey(t *testing.T) {
// Add some nil values // Add some nil values
nodes[owner][""] = []byte("some data") nodes[owner][""] = []byte("some data")
nodes[owner]["data1"] = []byte("some data") nodes[owner]["data1"] = []byte("some data1")
h := newTrienodeHistory(common.Hash{}, common.Hash{}, 1, nodes) h := newTrienodeHistory(common.Hash{}, common.Hash{}, 1, nodes)
@ -509,14 +503,10 @@ func TestTrienodeHistoryReaderNilKey(t *testing.T) {
if err := rawdb.WriteTrienodeHistory(freezer, 1, header, keySection, valueSection); err != nil { if err := rawdb.WriteTrienodeHistory(freezer, 1, header, keySection, valueSection); err != nil {
t.Fatalf("Failed to write trienode history: %v", err) t.Fatalf("Failed to write trienode history: %v", err)
} }
tr := newTrienodeHistoryReader(1, freezer)
tr, err := newTrienodeHistoryReader(1, freezer)
if err != nil {
t.Fatalf("Failed to construct history reader: %v", err)
}
// Test reading nil values // Test reading nil values
data1, err := tr.read(owner, "") data1, _, err := tr.read(owner, "")
if err != nil { if err != nil {
t.Fatalf("Failed to read nil value: %v", err) t.Fatalf("Failed to read nil value: %v", err)
} }
@ -525,11 +515,11 @@ func TestTrienodeHistoryReaderNilKey(t *testing.T) {
} }
// Test reading non-nil value // Test reading non-nil value
data2, err := tr.read(owner, "data1") data2, _, err := tr.read(owner, "data1")
if err != nil { if err != nil {
t.Fatalf("Failed to read non-nil value: %v", err) t.Fatalf("Failed to read non-nil value: %v", err)
} }
if !bytes.Equal(data2, []byte("some data")) { if !bytes.Equal(data2, []byte("some data1")) {
t.Fatal("Data mismatch for non-nil key") t.Fatal("Data mismatch for non-nil key")
} }
} }
@ -632,14 +622,14 @@ func TestDecodeSingleCorruptedData(t *testing.T) {
_, keySection, _, _ := h.encode() _, keySection, _, _ := h.encode()
// Test with empty key section // Test with empty key section
_, err := decodeSingle([]byte{}, nil) err := decodeSingle([]byte{}, nil)
if err == nil { if err == nil {
t.Fatal("Expected error for empty key section") t.Fatal("Expected error for empty key section")
} }
// Test with key section too small for trailer // Test with key section too small for trailer
if len(keySection) > 0 { if len(keySection) > 0 {
_, err := decodeSingle(keySection[:3], nil) // Less than 4 bytes for trailer err := decodeSingle(keySection[:3], nil) // Less than 4 bytes for trailer
if err == nil { if err == nil {
t.Fatal("Expected error for key section too small for trailer") t.Fatal("Expected error for key section too small for trailer")
} }
@ -652,7 +642,7 @@ func TestDecodeSingleCorruptedData(t *testing.T) {
for i := range 10 { for i := range 10 {
corrupted[i] = 0xFF corrupted[i] = 0xFF
} }
_, err = decodeSingle(corrupted, nil) err = decodeSingle(corrupted, nil)
if err == nil { if err == nil {
t.Fatal("Expected error for corrupted varint") t.Fatal("Expected error for corrupted varint")
} }
@ -662,7 +652,7 @@ func TestDecodeSingleCorruptedData(t *testing.T) {
copy(corrupted, keySection) copy(corrupted, keySection)
// Set restart count to something too large // Set restart count to something too large
binary.BigEndian.PutUint32(corrupted[len(corrupted)-4:], 10000) binary.BigEndian.PutUint32(corrupted[len(corrupted)-4:], 10000)
_, err = decodeSingle(corrupted, nil) err = decodeSingle(corrupted, nil)
if err == nil { if err == nil {
t.Fatal("Expected error for invalid restart count") t.Fatal("Expected error for invalid restart count")
} }
@ -691,3 +681,57 @@ func testEncodeDecode(t *testing.T, h *trienodeHistory) {
t.Fatal("Trienode content mismatch") t.Fatal("Trienode content mismatch")
} }
} }
func TestSearchSingle(t *testing.T) {
nodes := make(map[common.Hash]map[string][]byte)
ownerA, ownerB := testrand.Hash(), testrand.Hash()
nodes[ownerA] = make(map[string][]byte)
nodes[ownerB] = make(map[string][]byte)
for i := 0; i < trienodeDataBlockRestartLen*2; i++ {
nodes[ownerA][fmt.Sprintf("%d", 2*i+1)] = testrand.Bytes(rand.Intn(5))
nodes[ownerB][fmt.Sprintf("%d", 2*i+1)] = testrand.Bytes(rand.Intn(5))
}
h := newTrienodeHistory(common.Hash{}, common.Hash{}, 1, nodes)
var freezer, _ = rawdb.NewTrienodeFreezer(t.TempDir(), false, false)
defer freezer.Close()
header, keySection, valueSection, _ := h.encode()
if err := rawdb.WriteTrienodeHistory(freezer, 1, header, keySection, valueSection); err != nil {
t.Fatalf("Failed to write trienode history: %v", err)
}
tr := newTrienodeHistoryReader(1, freezer)
// Test reading non-existent entry
keys := []string{
"0",
"2",
"30",
"32",
"64",
"1000",
}
for _, key := range keys {
_, found, err := tr.read(ownerA, key)
if err != nil || found {
t.Fatalf("Expected non-existent entry %v", err)
}
_, found, err = tr.read(ownerB, key)
if err != nil || found {
t.Fatalf("Expected non-existent entry %v", err)
}
}
for owner, subnodes := range nodes {
for key, value := range subnodes {
got, found, err := tr.read(owner, key)
if err != nil || !found {
t.Fatal("Failed to read trienode")
}
if bytes.Compare(got, value) != 0 {
t.Fatalf("Unexpected value for key %v, got %v, expected %v", []byte(key), got, value)
}
}
}
}