core/rawdb, triedb/pathdb: re-structure the trienode history header (#32907)
Some checks are pending
/ Linux Build (push) Waiting to run
/ Linux Build (arm) (push) Waiting to run
/ Keeper Build (push) Waiting to run
/ Windows Build (push) Waiting to run
/ Docker Image (push) Waiting to run

In this PR, several changes have been made:

(a) restructure the trienode history header section

Previously, the offsets of the key and value sections were recorded before 
encoding data into these sections. As a result, these offsets referred to the
start position of each chunk rather than the end position.

This caused an issue where the end position of the last chunk was
unknown, making it incompatible with the freezer partial-read APIs. 
With this update, all offsets now refer to the end position, and the 
start position of the first chunk is always 0.

(b) Enable partial freezer read for trienode data retrieval

The partial freezer read feature is now utilized in trienode data
retrieval, improving efficiency.
This commit is contained in:
rjl493456442 2025-10-25 16:16:16 +08:00 committed by GitHub
parent 17e5222997
commit cfa3b96103
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 47 additions and 67 deletions

View file

@ -313,13 +313,13 @@ func ReadTrienodeHistoryHeader(db ethdb.AncientReaderOp, id uint64) ([]byte, err
}
// ReadTrienodeHistoryKeySection retrieves the key section of trienode history.
func ReadTrienodeHistoryKeySection(db ethdb.AncientReaderOp, id uint64) ([]byte, error) {
return db.Ancient(trienodeHistoryKeySectionTable, id-1)
func ReadTrienodeHistoryKeySection(db ethdb.AncientReaderOp, id uint64, offset uint64, length uint64) ([]byte, error) {
return db.AncientBytes(trienodeHistoryKeySectionTable, id-1, offset, length)
}
// ReadTrienodeHistoryValueSection retrieves the value section of trienode history.
func ReadTrienodeHistoryValueSection(db ethdb.AncientReaderOp, id uint64) ([]byte, error) {
return db.Ancient(trienodeHistoryValueSectionTable, id-1)
func ReadTrienodeHistoryValueSection(db ethdb.AncientReaderOp, id uint64, offset uint64, length uint64) ([]byte, error) {
return db.AncientBytes(trienodeHistoryValueSectionTable, id-1, offset, length)
}
// ReadTrienodeHistoryList retrieves the a list of trienode history corresponding

View file

@ -22,7 +22,6 @@ import (
"fmt"
"iter"
"maps"
"math"
"slices"
"sort"
"time"
@ -202,17 +201,6 @@ func (h *trienodeHistory) encode() ([]byte, []byte, []byte, error) {
binary.Write(&headerSection, binary.BigEndian, h.meta.block) // 8 byte
for _, owner := range h.owners {
// Fill the header section with offsets at key and value section
headerSection.Write(owner.Bytes()) // 32 bytes
binary.Write(&headerSection, binary.BigEndian, uint32(keySection.Len())) // 4 bytes
// The offset to the value section is theoretically unnecessary, since the
// individual value offset is already tracked in the key section. However,
// we still keep it here for two reasons:
// - It's cheap to store (only 4 bytes for each trie).
// - It can be useful for decoding the trie data when key is not required (e.g., in hash mode).
binary.Write(&headerSection, binary.BigEndian, uint32(valueSection.Len())) // 4 bytes
// Fill the key section with node index
var (
prevKey []byte
@ -266,6 +254,21 @@ func (h *trienodeHistory) encode() ([]byte, []byte, []byte, error) {
if _, err := keySection.Write(trailer); err != nil {
return nil, nil, nil, err
}
// Fill the header section with the offsets of the key and value sections.
// Note that the key/value offsets are intentionally tracked *after* encoding
// them into their respective sections, ensuring each offset refers to the end
// position. For n trie chunks, n offset pairs are sufficient to uniquely locate
// the corresponding data.
headerSection.Write(owner.Bytes()) // 32 bytes
binary.Write(&headerSection, binary.BigEndian, uint32(keySection.Len())) // 4 bytes
// The offset to the value section is theoretically unnecessary, since the
// individual value offset is already tracked in the key section. However,
// we still keep it here for two reasons:
// - It's cheap to store (only 4 bytes for each trie).
// - It can be useful for decoding the trie data when key is not required (e.g., in hash mode).
binary.Write(&headerSection, binary.BigEndian, uint32(valueSection.Len())) // 4 bytes
}
return headerSection.Bytes(), keySection.Bytes(), valueSection.Bytes(), nil
}
@ -475,22 +478,22 @@ func (h *trienodeHistory) decode(header []byte, keySection []byte, valueSection
for i := range len(owners) {
// Resolve the boundary of key section
keyStart := keyOffsets[i]
keyLimit := len(keySection)
if i != len(owners)-1 {
keyLimit = int(keyOffsets[i+1])
var keyStart, keyLimit uint32
if i != 0 {
keyStart = keyOffsets[i-1]
}
if int(keyStart) > len(keySection) || keyLimit > len(keySection) {
keyLimit = keyOffsets[i]
if int(keyStart) > len(keySection) || int(keyLimit) > len(keySection) {
return fmt.Errorf("invalid key offsets: keyStart: %d, keyLimit: %d, size: %d", keyStart, keyLimit, len(keySection))
}
// Resolve the boundary of value section
valStart := valueOffsets[i]
valLimit := len(valueSection)
if i != len(owners)-1 {
valLimit = int(valueOffsets[i+1])
var valStart, valLimit uint32
if i != 0 {
valStart = valueOffsets[i-1]
}
if int(valStart) > len(valueSection) || valLimit > len(valueSection) {
valLimit = valueOffsets[i]
if int(valStart) > len(valueSection) || int(valLimit) > len(valueSection) {
return fmt.Errorf("invalid value offsets: valueStart: %d, valueLimit: %d, size: %d", valStart, valLimit, len(valueSection))
}
@ -510,33 +513,27 @@ type iRange struct {
limit uint32
}
func (ir iRange) len() uint32 {
return ir.limit - ir.start
}
// singleTrienodeHistoryReader provides read access to a single trie within the
// trienode history. It stores an offset to the trie's position in the history,
// along with a set of per-node offsets that can be resolved on demand.
type singleTrienodeHistoryReader struct {
id uint64
reader ethdb.AncientReader
valueRange iRange // value range within the total value section
valueRange iRange // value range within the global value section
valueInternalOffsets map[string]iRange // value offset within the single trie data
}
func newSingleTrienodeHistoryReader(id uint64, reader ethdb.AncientReader, keyRange iRange, valueRange iRange) (*singleTrienodeHistoryReader, error) {
// TODO(rjl493456442) partial freezer read should be supported
keyData, err := rawdb.ReadTrienodeHistoryKeySection(reader, id)
keyData, err := rawdb.ReadTrienodeHistoryKeySection(reader, id, uint64(keyRange.start), uint64(keyRange.len()))
if err != nil {
return nil, err
}
keyStart := int(keyRange.start)
keyLimit := int(keyRange.limit)
if keyRange.limit == math.MaxUint32 {
keyLimit = len(keyData)
}
if len(keyData) < keyStart || len(keyData) < keyLimit {
return nil, fmt.Errorf("key section too short, start: %d, limit: %d, size: %d", keyStart, keyLimit, len(keyData))
}
valueOffsets := make(map[string]iRange)
_, err = decodeSingle(keyData[keyStart:keyLimit], func(key []byte, start int, limit int) error {
_, err = decodeSingle(keyData, func(key []byte, start int, limit int) error {
valueOffsets[string(key)] = iRange{
start: uint32(start),
limit: uint32(limit),
@ -560,20 +557,7 @@ func (sr *singleTrienodeHistoryReader) read(path string) ([]byte, error) {
if !exists {
return nil, fmt.Errorf("trienode %v not found", []byte(path))
}
// TODO(rjl493456442) partial freezer read should be supported
valueData, err := rawdb.ReadTrienodeHistoryValueSection(sr.reader, sr.id)
if err != nil {
return nil, err
}
if len(valueData) < int(sr.valueRange.start) {
return nil, fmt.Errorf("value section too short, start: %d, size: %d", sr.valueRange.start, len(valueData))
}
entryStart := sr.valueRange.start + offset.start
entryLimit := sr.valueRange.start + offset.limit
if len(valueData) < int(entryStart) || len(valueData) < int(entryLimit) {
return nil, fmt.Errorf("value section too short, start: %d, limit: %d, size: %d", entryStart, entryLimit, len(valueData))
}
return valueData[int(entryStart):int(entryLimit)], nil
return rawdb.ReadTrienodeHistoryValueSection(sr.reader, sr.id, uint64(sr.valueRange.start+offset.start), uint64(offset.len()))
}
// trienodeHistoryReader provides read access to node data in the trie node history.
@ -614,27 +598,23 @@ func (r *trienodeHistoryReader) decodeHeader() error {
}
for i, owner := range owners {
// Decode the key range for this trie chunk
var keyLimit uint32
if i == len(owners)-1 {
keyLimit = math.MaxUint32
} else {
keyLimit = keyOffsets[i+1]
var keyStart uint32
if i != 0 {
keyStart = keyOffsets[i-1]
}
r.keyRanges[owner] = iRange{
start: keyOffsets[i],
limit: keyLimit,
start: keyStart,
limit: keyOffsets[i],
}
// Decode the value range for this trie chunk
var valLimit uint32
if i == len(owners)-1 {
valLimit = math.MaxUint32
} else {
valLimit = valOffsets[i+1]
var valStart uint32
if i != 0 {
valStart = valOffsets[i-1]
}
r.valRanges[owner] = iRange{
start: valOffsets[i],
limit: valLimit,
start: valStart,
limit: valOffsets[i],
}
}
return nil