From cfa3b96103f515dc6bc280d78ab3d4830e4ca8c7 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Sat, 25 Oct 2025 16:16:16 +0800 Subject: [PATCH] core/rawdb, triedb/pathdb: re-structure the trienode history header (#32907) In this PR, several changes have been made: (a) restructure the trienode history header section Previously, the offsets of the key and value sections were recorded before encoding data into these sections. As a result, these offsets referred to the start position of each chunk rather than the end position. This caused an issue where the end position of the last chunk was unknown, making it incompatible with the freezer partial-read APIs. With this update, all offsets now refer to the end position, and the start position of the first chunk is always 0. (b) Enable partial freezer read for trienode data retrieval The partial freezer read feature is now utilized in trienode data retrieval, improving efficiency. --- core/rawdb/accessors_state.go | 8 +-- triedb/pathdb/history_trienode.go | 106 ++++++++++++------------------ 2 files changed, 47 insertions(+), 67 deletions(-) diff --git a/core/rawdb/accessors_state.go b/core/rawdb/accessors_state.go index 714c1f77d6..b97c7a07a1 100644 --- a/core/rawdb/accessors_state.go +++ b/core/rawdb/accessors_state.go @@ -313,13 +313,13 @@ func ReadTrienodeHistoryHeader(db ethdb.AncientReaderOp, id uint64) ([]byte, err } // ReadTrienodeHistoryKeySection retrieves the key section of trienode history. -func ReadTrienodeHistoryKeySection(db ethdb.AncientReaderOp, id uint64) ([]byte, error) { - return db.Ancient(trienodeHistoryKeySectionTable, id-1) +func ReadTrienodeHistoryKeySection(db ethdb.AncientReaderOp, id uint64, offset uint64, length uint64) ([]byte, error) { + return db.AncientBytes(trienodeHistoryKeySectionTable, id-1, offset, length) } // ReadTrienodeHistoryValueSection retrieves the value section of trienode history. -func ReadTrienodeHistoryValueSection(db ethdb.AncientReaderOp, id uint64) ([]byte, error) { - return db.Ancient(trienodeHistoryValueSectionTable, id-1) +func ReadTrienodeHistoryValueSection(db ethdb.AncientReaderOp, id uint64, offset uint64, length uint64) ([]byte, error) { + return db.AncientBytes(trienodeHistoryValueSectionTable, id-1, offset, length) } // ReadTrienodeHistoryList retrieves the a list of trienode history corresponding diff --git a/triedb/pathdb/history_trienode.go b/triedb/pathdb/history_trienode.go index 2f31238612..3f45b41117 100644 --- a/triedb/pathdb/history_trienode.go +++ b/triedb/pathdb/history_trienode.go @@ -22,7 +22,6 @@ import ( "fmt" "iter" "maps" - "math" "slices" "sort" "time" @@ -202,17 +201,6 @@ func (h *trienodeHistory) encode() ([]byte, []byte, []byte, error) { binary.Write(&headerSection, binary.BigEndian, h.meta.block) // 8 byte for _, owner := range h.owners { - // Fill the header section with offsets at key and value section - headerSection.Write(owner.Bytes()) // 32 bytes - binary.Write(&headerSection, binary.BigEndian, uint32(keySection.Len())) // 4 bytes - - // The offset to the value section is theoretically unnecessary, since the - // individual value offset is already tracked in the key section. However, - // we still keep it here for two reasons: - // - It's cheap to store (only 4 bytes for each trie). - // - It can be useful for decoding the trie data when key is not required (e.g., in hash mode). - binary.Write(&headerSection, binary.BigEndian, uint32(valueSection.Len())) // 4 bytes - // Fill the key section with node index var ( prevKey []byte @@ -266,6 +254,21 @@ func (h *trienodeHistory) encode() ([]byte, []byte, []byte, error) { if _, err := keySection.Write(trailer); err != nil { return nil, nil, nil, err } + + // Fill the header section with the offsets of the key and value sections. + // Note that the key/value offsets are intentionally tracked *after* encoding + // them into their respective sections, ensuring each offset refers to the end + // position. For n trie chunks, n offset pairs are sufficient to uniquely locate + // the corresponding data. + headerSection.Write(owner.Bytes()) // 32 bytes + binary.Write(&headerSection, binary.BigEndian, uint32(keySection.Len())) // 4 bytes + + // The offset to the value section is theoretically unnecessary, since the + // individual value offset is already tracked in the key section. However, + // we still keep it here for two reasons: + // - It's cheap to store (only 4 bytes for each trie). + // - It can be useful for decoding the trie data when key is not required (e.g., in hash mode). + binary.Write(&headerSection, binary.BigEndian, uint32(valueSection.Len())) // 4 bytes } return headerSection.Bytes(), keySection.Bytes(), valueSection.Bytes(), nil } @@ -475,22 +478,22 @@ func (h *trienodeHistory) decode(header []byte, keySection []byte, valueSection for i := range len(owners) { // Resolve the boundary of key section - keyStart := keyOffsets[i] - keyLimit := len(keySection) - if i != len(owners)-1 { - keyLimit = int(keyOffsets[i+1]) + var keyStart, keyLimit uint32 + if i != 0 { + keyStart = keyOffsets[i-1] } - if int(keyStart) > len(keySection) || keyLimit > len(keySection) { + keyLimit = keyOffsets[i] + if int(keyStart) > len(keySection) || int(keyLimit) > len(keySection) { return fmt.Errorf("invalid key offsets: keyStart: %d, keyLimit: %d, size: %d", keyStart, keyLimit, len(keySection)) } // Resolve the boundary of value section - valStart := valueOffsets[i] - valLimit := len(valueSection) - if i != len(owners)-1 { - valLimit = int(valueOffsets[i+1]) + var valStart, valLimit uint32 + if i != 0 { + valStart = valueOffsets[i-1] } - if int(valStart) > len(valueSection) || valLimit > len(valueSection) { + valLimit = valueOffsets[i] + if int(valStart) > len(valueSection) || int(valLimit) > len(valueSection) { return fmt.Errorf("invalid value offsets: valueStart: %d, valueLimit: %d, size: %d", valStart, valLimit, len(valueSection)) } @@ -510,33 +513,27 @@ type iRange struct { limit uint32 } +func (ir iRange) len() uint32 { + return ir.limit - ir.start +} + // singleTrienodeHistoryReader provides read access to a single trie within the // trienode history. It stores an offset to the trie's position in the history, // along with a set of per-node offsets that can be resolved on demand. type singleTrienodeHistoryReader struct { id uint64 reader ethdb.AncientReader - valueRange iRange // value range within the total value section + valueRange iRange // value range within the global value section valueInternalOffsets map[string]iRange // value offset within the single trie data } func newSingleTrienodeHistoryReader(id uint64, reader ethdb.AncientReader, keyRange iRange, valueRange iRange) (*singleTrienodeHistoryReader, error) { - // TODO(rjl493456442) partial freezer read should be supported - keyData, err := rawdb.ReadTrienodeHistoryKeySection(reader, id) + keyData, err := rawdb.ReadTrienodeHistoryKeySection(reader, id, uint64(keyRange.start), uint64(keyRange.len())) if err != nil { return nil, err } - keyStart := int(keyRange.start) - keyLimit := int(keyRange.limit) - if keyRange.limit == math.MaxUint32 { - keyLimit = len(keyData) - } - if len(keyData) < keyStart || len(keyData) < keyLimit { - return nil, fmt.Errorf("key section too short, start: %d, limit: %d, size: %d", keyStart, keyLimit, len(keyData)) - } - valueOffsets := make(map[string]iRange) - _, err = decodeSingle(keyData[keyStart:keyLimit], func(key []byte, start int, limit int) error { + _, err = decodeSingle(keyData, func(key []byte, start int, limit int) error { valueOffsets[string(key)] = iRange{ start: uint32(start), limit: uint32(limit), @@ -560,20 +557,7 @@ func (sr *singleTrienodeHistoryReader) read(path string) ([]byte, error) { if !exists { return nil, fmt.Errorf("trienode %v not found", []byte(path)) } - // TODO(rjl493456442) partial freezer read should be supported - valueData, err := rawdb.ReadTrienodeHistoryValueSection(sr.reader, sr.id) - if err != nil { - return nil, err - } - if len(valueData) < int(sr.valueRange.start) { - return nil, fmt.Errorf("value section too short, start: %d, size: %d", sr.valueRange.start, len(valueData)) - } - entryStart := sr.valueRange.start + offset.start - entryLimit := sr.valueRange.start + offset.limit - if len(valueData) < int(entryStart) || len(valueData) < int(entryLimit) { - return nil, fmt.Errorf("value section too short, start: %d, limit: %d, size: %d", entryStart, entryLimit, len(valueData)) - } - return valueData[int(entryStart):int(entryLimit)], nil + return rawdb.ReadTrienodeHistoryValueSection(sr.reader, sr.id, uint64(sr.valueRange.start+offset.start), uint64(offset.len())) } // trienodeHistoryReader provides read access to node data in the trie node history. @@ -614,27 +598,23 @@ func (r *trienodeHistoryReader) decodeHeader() error { } for i, owner := range owners { // Decode the key range for this trie chunk - var keyLimit uint32 - if i == len(owners)-1 { - keyLimit = math.MaxUint32 - } else { - keyLimit = keyOffsets[i+1] + var keyStart uint32 + if i != 0 { + keyStart = keyOffsets[i-1] } r.keyRanges[owner] = iRange{ - start: keyOffsets[i], - limit: keyLimit, + start: keyStart, + limit: keyOffsets[i], } // Decode the value range for this trie chunk - var valLimit uint32 - if i == len(owners)-1 { - valLimit = math.MaxUint32 - } else { - valLimit = valOffsets[i+1] + var valStart uint32 + if i != 0 { + valStart = valOffsets[i-1] } r.valRanges[owner] = iRange{ - start: valOffsets[i], - limit: valLimit, + start: valStart, + limit: valOffsets[i], } } return nil