mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-04-05 01:25:55 +00:00
It's a PR based on #33303 and introduces an approach for trienode history indexing. --- In the current archive node design, resolving a historical trie node at a specific block involves the following steps: - Look up the corresponding trie node index and locate the first entry whose state ID is greater than the target state ID. - Resolve the trie node from the associated trienode history object. A naive approach would be to store mutation records for every trie node, similar to how flat state mutations are recorded. However, the total number of trie nodes is extremely large (approximately 2.4 billion), and the vast majority of them are rarely modified. Creating an index entry for each individual trie node would be very wasteful in both storage and indexing overhead. To address this, we aggregate multiple trie nodes into chunks and index mutations at the chunk level instead. --- For a storage trie, the trie is vertically partitioned into multiple sub tries, each spanning three consecutive levels. The top three levels (1 + 16 + 256 nodes) form the first chunk, and every subsequent three-level segment forms another chunk. ``` Original trie structure Level 0 [ ROOT ] 1 node Level 1 [0] [1] [2] ... [f] 16 nodes Level 2 [00] [01] ... [0f] [10] ... [ff] 256 nodes Level 3 [000] [001] ... [00f] [010] ... [fff] 4096 nodes Level 4 [0000] ... [000f] [0010] ... [001f] ... [ffff] 65536 nodes Vertical split into chunks (3 levels per chunk) Level0 [ ROOT ] 1 chunk Level3 [000] ... [fff] 4096 chunks Level6 [000000] ... [fffffff] 16777216 chunks ``` Within each chunk, there are 273 nodes in total, regardless of the chunk's depth in the trie. ``` Level 0 [ 0 ] 1 node Level 1 [ 1 ] … [ 16 ] 16 nodes Level 2 [ 17 ] … … [ 272 ] 256 nodes ``` Each chunk is uniquely identified by the path prefix of the root node of its corresponding sub-trie. Within a chunk, nodes are identified by a numeric index ranging from 0 to 272. For example, suppose that at block 100, the nodes with paths `[]`, `[0]`, `[f]`, `[00]`, and `[ff]` are modified. The mutation record for chunk 0 is then appended with the following entry: `[100 → [0, 1, 16, 17, 272]]`, `272` is the numeric ID of path `[ff]`. Furthermore, due to the structural properties of the Merkle Patricia Trie, if a child node is modified, all of its ancestors along the same path must also be updated. As a result, in the above example, recording mutations for nodes `00` and `ff` alone is sufficient, as this implicitly indicates that their ancestor nodes `[]`, `[0]` and `[f]` were also modified at block 100. --- Query processing is slightly more complicated. Since trie nodes are indexed at the chunk level, each individual trie node lookup requires an additional filtering step to ensure that a given mutation record actually corresponds to the target trie node. As mentioned earlier, mutation records store only the numeric identifiers of leaf nodes, while ancestor nodes are omitted for storage efficiency. Consequently, when querying an ancestor node, additional checks are required to determine whether the mutation record implicitly represents a modification to that ancestor. Moreover, since trie nodes are indexed at the chunk level, some trie nodes may be updated frequently, causing their mutation records to dominate the index. Queries targeting rarely modified trie nodes would then scan a large amount of irrelevant index data, significantly degrading performance. To address this issue, a bitmap is introduced for each index block and stored in the chunk's metadata. Before loading a specific index block, the bitmap is checked to determine whether the block contains mutation records relevant to the target trie node. If the bitmap indicates that the block does not contain such records, the block is skipped entirely.
719 lines
26 KiB
Go
719 lines
26 KiB
Go
// Copyright 2025 The go-ethereum Authors
|
|
// This file is part of the go-ethereum library.
|
|
//
|
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package pathdb
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"errors"
|
|
"fmt"
|
|
"iter"
|
|
"maps"
|
|
"math"
|
|
"slices"
|
|
"sort"
|
|
"time"
|
|
|
|
"github.com/ethereum/go-ethereum/common"
|
|
"github.com/ethereum/go-ethereum/core/rawdb"
|
|
"github.com/ethereum/go-ethereum/ethdb"
|
|
"github.com/ethereum/go-ethereum/log"
|
|
)
|
|
|
|
// Each trie node history entry consists of three parts (stored in three freezer
|
|
// tables according):
|
|
//
|
|
// # Header
|
|
// The header records metadata, including:
|
|
//
|
|
// - the history version (1 byte)
|
|
// - the parent state root (32 bytes)
|
|
// - the current state root (32 bytes)
|
|
// - block number (8 bytes)
|
|
//
|
|
// - a lexicographically sorted list of trie IDs
|
|
// - the corresponding offsets into the key and value sections for each trie data chunk
|
|
//
|
|
// Although some fields (e.g., parent state root, block number) are duplicated
|
|
// between the state history and the trienode history, these two histories
|
|
// operate independently. To ensure each remains self-contained and self-descriptive,
|
|
// we have chosen to maintain these duplicate fields.
|
|
//
|
|
// # Key section
|
|
// The key section stores trie node keys (paths) in a compressed format.
|
|
// It also contains relative offsets into the value section for resolving
|
|
// the corresponding trie node data. Note that these offsets are relative
|
|
// to the data chunk for the trie; the chunk offset must be added to obtain
|
|
// the absolute position.
|
|
//
|
|
// # Value section
|
|
// The value section is a concatenated byte stream of all trie node data.
|
|
// Each trie node can be retrieved using the offset and length specified
|
|
// by its index entry.
|
|
//
|
|
// The header and key sections are sufficient for locating a trie node,
|
|
// while a partial read of the value section is enough to retrieve its data.
|
|
|
|
// Header section:
|
|
//
|
|
// +----------+------------------+---------------------+---------------------+-------+------------------+---------------------+---------------------|
|
|
// | metadata | TrieID(32 bytes) | key offset(4 bytes) | val offset(4 bytes) | ... | TrieID(32 bytes) | key offset(4 bytes) | val offset(4 bytes) |
|
|
// +----------+------------------+---------------------+---------------------+-------+------------------+---------------------+---------------------|
|
|
//
|
|
//
|
|
// Key section:
|
|
//
|
|
// + restart point + restart point (depends on restart interval)
|
|
// / /
|
|
// +---------------+---------------+---------------+---------------+---------+
|
|
// | node entry 1 | node entry 2 | ... | node entry n | trailer |
|
|
// +---------------+---------------+---------------+---------------+---------+
|
|
// \ /
|
|
// +---- restart block ------+
|
|
//
|
|
// node entry:
|
|
//
|
|
// +---- key len ----+
|
|
// / \
|
|
// +-------+---------+-----------+---------+-----------------------+-----------------+
|
|
// | shared (varint) | not shared (varint) | value length (varlen) | key (varlen) |
|
|
// +-----------------+---------------------+-----------------------+-----------------+
|
|
//
|
|
// trailer:
|
|
//
|
|
// +---- 4-bytes ----+ +---- 4-bytes ----+
|
|
// / \ / \
|
|
// +----------------------+------------------------+-----+--------------------------+
|
|
// | restart_1 key offset | restart_1 value offset | ... | restart number (4-bytes) |
|
|
// +----------------------+------------------------+-----+--------------------------+
|
|
//
|
|
// Note: Both the key offset and the value offset are relative to the start of
|
|
// the trie data chunk. To obtain the absolute offset, add the offset of the
|
|
// trie data chunk itself.
|
|
//
|
|
// Value section:
|
|
//
|
|
// +--------------+--------------+-------+---------------+
|
|
// | node data 1 | node data 2 | ... | node data n |
|
|
// +--------------+--------------+-------+---------------+
|
|
//
|
|
// NOTE: All fixed-length integer are big-endian.
|
|
|
|
const (
|
|
trienodeHistoryV0 = uint8(0) // initial version of node history structure
|
|
trienodeHistoryVersion = trienodeHistoryV0 // the default node history version
|
|
trienodeMetadataSize = 1 + 2*common.HashLength + 8 // the size of metadata in the history
|
|
trienodeTrieHeaderSize = 8 + common.HashLength // the size of a single trie header in history
|
|
trienodeDataBlockRestartLen = 16 // The restart interval length of trie node block
|
|
)
|
|
|
|
// trienodeMetadata describes the meta data of trienode history.
|
|
type trienodeMetadata struct {
|
|
version uint8 // version tag of history object
|
|
parent common.Hash // prev-state root before the state transition
|
|
root common.Hash // post-state root after the state transition
|
|
block uint64 // associated block number
|
|
}
|
|
|
|
// trienodeHistory represents a set of trie node changes resulting from a state
|
|
// transition across the main account trie and all associated storage tries.
|
|
type trienodeHistory struct {
|
|
meta *trienodeMetadata // Metadata of the history
|
|
owners []common.Hash // List of trie identifier sorted lexicographically
|
|
nodeList map[common.Hash][]string // Set of node paths sorted lexicographically
|
|
nodes map[common.Hash]map[string][]byte // Set of original value of trie nodes before state transition
|
|
}
|
|
|
|
// newTrienodeHistory constructs a trienode history with the provided trie nodes.
|
|
func newTrienodeHistory(root common.Hash, parent common.Hash, block uint64, nodes map[common.Hash]map[string][]byte) *trienodeHistory {
|
|
nodeList := make(map[common.Hash][]string)
|
|
for owner, subset := range nodes {
|
|
keys := sort.StringSlice(slices.Collect(maps.Keys(subset)))
|
|
keys.Sort()
|
|
nodeList[owner] = keys
|
|
}
|
|
return &trienodeHistory{
|
|
meta: &trienodeMetadata{
|
|
version: trienodeHistoryVersion,
|
|
parent: parent,
|
|
root: root,
|
|
block: block,
|
|
},
|
|
owners: slices.SortedFunc(maps.Keys(nodes), common.Hash.Cmp),
|
|
nodeList: nodeList,
|
|
nodes: nodes,
|
|
}
|
|
}
|
|
|
|
// typ implements the history interface, returning the historical data type held.
|
|
func (h *trienodeHistory) typ() historyType {
|
|
return typeTrienodeHistory
|
|
}
|
|
|
|
// forEach implements the history interface, returning an iterator to traverse the
|
|
// state entries in the history.
|
|
func (h *trienodeHistory) forEach() iter.Seq[stateIdent] {
|
|
return func(yield func(stateIdent) bool) {
|
|
for _, owner := range h.owners {
|
|
for _, path := range h.nodeList[owner] {
|
|
if !yield(newTrienodeIdent(owner, path)) {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// encode serializes the contained trie nodes into bytes.
|
|
func (h *trienodeHistory) encode() ([]byte, []byte, []byte, error) {
|
|
var (
|
|
buf = make([]byte, 64)
|
|
headerSection bytes.Buffer
|
|
keySection bytes.Buffer
|
|
valueSection bytes.Buffer
|
|
)
|
|
binary.Write(&headerSection, binary.BigEndian, h.meta.version) // 1 byte
|
|
headerSection.Write(h.meta.parent.Bytes()) // 32 bytes
|
|
headerSection.Write(h.meta.root.Bytes()) // 32 bytes
|
|
binary.Write(&headerSection, binary.BigEndian, h.meta.block) // 8 byte
|
|
|
|
for _, owner := range h.owners {
|
|
// Fill the key section with node index
|
|
var (
|
|
prevKey []byte
|
|
restarts []uint32
|
|
prefixLen int
|
|
|
|
internalKeyOffset uint32 // key offset for the trie internally
|
|
internalValOffset uint32 // value offset for the trie internally
|
|
)
|
|
for i, path := range h.nodeList[owner] {
|
|
key := []byte(path)
|
|
if i%trienodeDataBlockRestartLen == 0 {
|
|
restarts = append(restarts, internalKeyOffset)
|
|
restarts = append(restarts, internalValOffset)
|
|
prefixLen = 0
|
|
} else {
|
|
prefixLen = commonPrefixLen(prevKey, key)
|
|
}
|
|
value := h.nodes[owner][path]
|
|
|
|
// key section
|
|
n := binary.PutUvarint(buf[0:], uint64(prefixLen)) // key length shared (varint)
|
|
n += binary.PutUvarint(buf[n:], uint64(len(key)-prefixLen)) // key length not shared (varint)
|
|
n += binary.PutUvarint(buf[n:], uint64(len(value))) // value length (varint)
|
|
|
|
if _, err := keySection.Write(buf[:n]); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
// unshared key
|
|
if _, err := keySection.Write(key[prefixLen:]); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
n += len(key) - prefixLen
|
|
prevKey = key
|
|
|
|
// value section
|
|
if _, err := valueSection.Write(value); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
internalKeyOffset += uint32(n)
|
|
internalValOffset += uint32(len(value))
|
|
}
|
|
|
|
// Encode trailer, the number of restart sections is len(restarts))/2,
|
|
// as we track the offsets of both key and value sections.
|
|
var trailer []byte
|
|
for _, number := range append(restarts, uint32(len(restarts))/2) {
|
|
binary.BigEndian.PutUint32(buf[:4], number)
|
|
trailer = append(trailer, buf[:4]...)
|
|
}
|
|
if _, err := keySection.Write(trailer); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
|
|
// Fill the header section with the offsets of the key and value sections.
|
|
// Note that the key/value offsets are intentionally tracked *after* encoding
|
|
// them into their respective sections, ensuring each offset refers to the end
|
|
// position. For n trie chunks, n offset pairs are sufficient to uniquely locate
|
|
// the corresponding data.
|
|
headerSection.Write(owner.Bytes()) // 32 bytes
|
|
binary.Write(&headerSection, binary.BigEndian, uint32(keySection.Len())) // 4 bytes
|
|
|
|
// The offset to the value section is theoretically unnecessary, since the
|
|
// individual value offset is already tracked in the key section. However,
|
|
// we still keep it here for two reasons:
|
|
// - It's cheap to store (only 4 bytes for each trie).
|
|
// - It can be useful for decoding the trie data when key is not required (e.g., in hash mode).
|
|
binary.Write(&headerSection, binary.BigEndian, uint32(valueSection.Len())) // 4 bytes
|
|
}
|
|
return headerSection.Bytes(), keySection.Bytes(), valueSection.Bytes(), nil
|
|
}
|
|
|
|
// decodeHeader resolves the metadata from the header section. An error
|
|
// should be returned if the header section is corrupted.
|
|
func decodeHeader(data []byte) (*trienodeMetadata, []common.Hash, []uint32, []uint32, error) {
|
|
if len(data) < trienodeMetadataSize {
|
|
return nil, nil, nil, nil, fmt.Errorf("trienode history is too small, index size: %d", len(data))
|
|
}
|
|
version := data[0]
|
|
if version != trienodeHistoryVersion {
|
|
return nil, nil, nil, nil, fmt.Errorf("unregonized trienode history version: %d", version)
|
|
}
|
|
parent := common.BytesToHash(data[1 : common.HashLength+1]) // 32 bytes
|
|
root := common.BytesToHash(data[common.HashLength+1 : common.HashLength*2+1]) // 32 bytes
|
|
block := binary.BigEndian.Uint64(data[common.HashLength*2+1 : trienodeMetadataSize]) // 8 bytes
|
|
|
|
size := len(data) - trienodeMetadataSize
|
|
if size%trienodeTrieHeaderSize != 0 {
|
|
return nil, nil, nil, nil, fmt.Errorf("truncated trienode history data, size %d", len(data))
|
|
}
|
|
count := size / trienodeTrieHeaderSize
|
|
|
|
var (
|
|
owners = make([]common.Hash, 0, count)
|
|
keyOffsets = make([]uint32, 0, count)
|
|
valOffsets = make([]uint32, 0, count)
|
|
)
|
|
for i := range count {
|
|
n := trienodeMetadataSize + trienodeTrieHeaderSize*i
|
|
owner := common.BytesToHash(data[n : n+common.HashLength])
|
|
if i != 0 && bytes.Compare(owner.Bytes(), owners[i-1].Bytes()) <= 0 {
|
|
return nil, nil, nil, nil, fmt.Errorf("trienode owners are out of order, prev: %v, cur: %v", owners[i-1], owner)
|
|
}
|
|
owners = append(owners, owner)
|
|
|
|
// Decode the offset to the key section
|
|
keyOffset := binary.BigEndian.Uint32(data[n+common.HashLength : n+common.HashLength+4])
|
|
if i != 0 && keyOffset <= keyOffsets[i-1] {
|
|
return nil, nil, nil, nil, fmt.Errorf("key offset is out of order, prev: %v, cur: %v", keyOffsets[i-1], keyOffset)
|
|
}
|
|
keyOffsets = append(keyOffsets, keyOffset)
|
|
|
|
// Decode the offset into the value section. Note that identical value offsets
|
|
// are valid if the node values in the last trie chunk are all zero (e.g., after
|
|
// a trie deletion).
|
|
valOffset := binary.BigEndian.Uint32(data[n+common.HashLength+4 : n+common.HashLength+8])
|
|
if i != 0 && valOffset < valOffsets[i-1] {
|
|
return nil, nil, nil, nil, fmt.Errorf("value offset is out of order, prev: %v, cur: %v", valOffsets[i-1], valOffset)
|
|
}
|
|
valOffsets = append(valOffsets, valOffset)
|
|
}
|
|
return &trienodeMetadata{
|
|
version: version,
|
|
parent: parent,
|
|
root: root,
|
|
block: block,
|
|
}, owners, keyOffsets, valOffsets, nil
|
|
}
|
|
|
|
func decodeSingle(keySection []byte, onValue func([]byte, int, int) error) ([]string, error) {
|
|
var (
|
|
prevKey []byte
|
|
items int
|
|
keyOffsets []uint32
|
|
valOffsets []uint32
|
|
|
|
keyOff int // the key offset within the single trie data
|
|
valOff int // the value offset within the single trie data
|
|
|
|
keys []string
|
|
)
|
|
// Decode the number of restart section
|
|
if len(keySection) < 4 {
|
|
return nil, fmt.Errorf("key section too short, size: %d", len(keySection))
|
|
}
|
|
nRestarts := binary.BigEndian.Uint32(keySection[len(keySection)-4:])
|
|
|
|
if len(keySection) < int(8*nRestarts)+4 {
|
|
return nil, fmt.Errorf("key section too short, restarts: %d, size: %d", nRestarts, len(keySection))
|
|
}
|
|
for i := range int(nRestarts) {
|
|
o := len(keySection) - 4 - (int(nRestarts)-i)*8
|
|
keyOffset := binary.BigEndian.Uint32(keySection[o : o+4])
|
|
if i != 0 && keyOffset <= keyOffsets[i-1] {
|
|
return nil, fmt.Errorf("key offset is out of order, prev: %v, cur: %v", keyOffsets[i-1], keyOffset)
|
|
}
|
|
keyOffsets = append(keyOffsets, keyOffset)
|
|
|
|
// Same value offset is allowed just in case all the trie nodes in the last
|
|
// section have zero-size value.
|
|
valOffset := binary.BigEndian.Uint32(keySection[o+4 : o+8])
|
|
if i != 0 && valOffset < valOffsets[i-1] {
|
|
return nil, fmt.Errorf("value offset is out of order, prev: %v, cur: %v", valOffsets[i-1], valOffset)
|
|
}
|
|
valOffsets = append(valOffsets, valOffset)
|
|
}
|
|
keyLimit := len(keySection) - 4 - int(nRestarts)*8
|
|
|
|
// Decode data
|
|
for keyOff < keyLimit {
|
|
// Validate the key and value offsets within the single trie data chunk
|
|
if items%trienodeDataBlockRestartLen == 0 {
|
|
restartIndex := items / trienodeDataBlockRestartLen
|
|
if restartIndex >= len(keyOffsets) {
|
|
return nil, fmt.Errorf("restart index out of range: %d, available restarts: %d", restartIndex, len(keyOffsets))
|
|
}
|
|
if keyOff != int(keyOffsets[restartIndex]) {
|
|
return nil, fmt.Errorf("key offset is not matched, recorded: %d, want: %d", keyOffsets[restartIndex], keyOff)
|
|
}
|
|
if valOff != int(valOffsets[restartIndex]) {
|
|
return nil, fmt.Errorf("value offset is not matched, recorded: %d, want: %d", valOffsets[restartIndex], valOff)
|
|
}
|
|
}
|
|
// Resolve the entry from key section
|
|
nShared, nn := binary.Uvarint(keySection[keyOff:]) // key length shared (varint)
|
|
if nn <= 0 {
|
|
return nil, fmt.Errorf("corrupted varint encoding for nShared at offset %d", keyOff)
|
|
}
|
|
keyOff += nn
|
|
nUnshared, nn := binary.Uvarint(keySection[keyOff:]) // key length not shared (varint)
|
|
if nn <= 0 {
|
|
return nil, fmt.Errorf("corrupted varint encoding for nUnshared at offset %d", keyOff)
|
|
}
|
|
keyOff += nn
|
|
nValue, nn := binary.Uvarint(keySection[keyOff:]) // value length (varint)
|
|
if nn <= 0 {
|
|
return nil, fmt.Errorf("corrupted varint encoding for nValue at offset %d", keyOff)
|
|
}
|
|
keyOff += nn
|
|
|
|
// Validate that the values can fit in an int to prevent overflow on 32-bit systems
|
|
if nShared > uint64(math.MaxUint32) || nUnshared > uint64(math.MaxUint32) || nValue > uint64(math.MaxUint32) {
|
|
return nil, errors.New("key size too large")
|
|
}
|
|
|
|
// Resolve unshared key
|
|
if keyOff+int(nUnshared) > len(keySection) {
|
|
return nil, fmt.Errorf("key length too long, unshared key length: %d, off: %d, section size: %d", nUnshared, keyOff, len(keySection))
|
|
}
|
|
unsharedKey := keySection[keyOff : keyOff+int(nUnshared)]
|
|
keyOff += int(nUnshared)
|
|
|
|
// Assemble the full key
|
|
var key []byte
|
|
if items%trienodeDataBlockRestartLen == 0 {
|
|
if nShared != 0 {
|
|
return nil, fmt.Errorf("unexpected non-zero shared key prefix: %d", nShared)
|
|
}
|
|
key = unsharedKey
|
|
} else {
|
|
if int(nShared) > len(prevKey) {
|
|
return nil, fmt.Errorf("unexpected shared key prefix: %d, prefix key length: %d", nShared, len(prevKey))
|
|
}
|
|
key = append([]byte{}, prevKey[:nShared]...)
|
|
key = append(key, unsharedKey...)
|
|
}
|
|
if items != 0 && bytes.Compare(prevKey, key) >= 0 {
|
|
return nil, fmt.Errorf("trienode paths are out of order, prev: %v, cur: %v", prevKey, key)
|
|
}
|
|
prevKey = key
|
|
|
|
// Resolve value
|
|
if onValue != nil {
|
|
if err := onValue(key, valOff, valOff+int(nValue)); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
valOff += int(nValue)
|
|
|
|
items++
|
|
keys = append(keys, string(key))
|
|
}
|
|
if keyOff != keyLimit {
|
|
return nil, fmt.Errorf("excessive key data after decoding, offset: %d, size: %d", keyOff, keyLimit)
|
|
}
|
|
return keys, nil
|
|
}
|
|
|
|
func decodeSingleWithValue(keySection []byte, valueSection []byte) ([]string, map[string][]byte, error) {
|
|
var (
|
|
offset int
|
|
nodes = make(map[string][]byte)
|
|
)
|
|
paths, err := decodeSingle(keySection, func(key []byte, start int, limit int) error {
|
|
if start != offset {
|
|
return fmt.Errorf("gapped value section offset: %d, want: %d", start, offset)
|
|
}
|
|
// start == limit is allowed for zero-value trie node (e.g., non-existent node)
|
|
if start > limit {
|
|
return fmt.Errorf("invalid value offsets, start: %d, limit: %d", start, limit)
|
|
}
|
|
if start > len(valueSection) || limit > len(valueSection) {
|
|
return fmt.Errorf("value section out of range: start: %d, limit: %d, size: %d", start, limit, len(valueSection))
|
|
}
|
|
nodes[string(key)] = valueSection[start:limit]
|
|
|
|
offset = limit
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
if offset != len(valueSection) {
|
|
return nil, nil, fmt.Errorf("excessive value data after decoding, offset: %d, size: %d", offset, len(valueSection))
|
|
}
|
|
return paths, nodes, nil
|
|
}
|
|
|
|
// decode deserializes the contained trie nodes from the provided bytes.
|
|
func (h *trienodeHistory) decode(header []byte, keySection []byte, valueSection []byte) error {
|
|
metadata, owners, keyOffsets, valueOffsets, err := decodeHeader(header)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
h.meta = metadata
|
|
h.owners = owners
|
|
h.nodeList = make(map[common.Hash][]string)
|
|
h.nodes = make(map[common.Hash]map[string][]byte)
|
|
|
|
for i := range len(owners) {
|
|
// Resolve the boundary of key section
|
|
var keyStart, keyLimit uint32
|
|
if i != 0 {
|
|
keyStart = keyOffsets[i-1]
|
|
}
|
|
keyLimit = keyOffsets[i]
|
|
if int(keyStart) > len(keySection) || int(keyLimit) > len(keySection) {
|
|
return fmt.Errorf("invalid key offsets: keyStart: %d, keyLimit: %d, size: %d", keyStart, keyLimit, len(keySection))
|
|
}
|
|
|
|
// Resolve the boundary of value section
|
|
var valStart, valLimit uint32
|
|
if i != 0 {
|
|
valStart = valueOffsets[i-1]
|
|
}
|
|
valLimit = valueOffsets[i]
|
|
if int(valStart) > len(valueSection) || int(valLimit) > len(valueSection) {
|
|
return fmt.Errorf("invalid value offsets: valueStart: %d, valueLimit: %d, size: %d", valStart, valLimit, len(valueSection))
|
|
}
|
|
|
|
// Decode the key and values for this specific trie
|
|
paths, nodes, err := decodeSingleWithValue(keySection[keyStart:keyLimit], valueSection[valStart:valLimit])
|
|
if err != nil {
|
|
return err
|
|
}
|
|
h.nodeList[owners[i]] = paths
|
|
h.nodes[owners[i]] = nodes
|
|
}
|
|
return nil
|
|
}
|
|
|
|
type iRange struct {
|
|
start uint32
|
|
limit uint32
|
|
}
|
|
|
|
func (ir iRange) len() uint32 {
|
|
return ir.limit - ir.start
|
|
}
|
|
|
|
// singleTrienodeHistoryReader provides read access to a single trie within the
|
|
// trienode history. It stores an offset to the trie's position in the history,
|
|
// along with a set of per-node offsets that can be resolved on demand.
|
|
type singleTrienodeHistoryReader struct {
|
|
id uint64
|
|
reader ethdb.AncientReader
|
|
valueRange iRange // value range within the global value section
|
|
valueInternalOffsets map[string]iRange // value offset within the single trie data
|
|
}
|
|
|
|
func newSingleTrienodeHistoryReader(id uint64, reader ethdb.AncientReader, keyRange iRange, valueRange iRange) (*singleTrienodeHistoryReader, error) {
|
|
keyData, err := rawdb.ReadTrienodeHistoryKeySection(reader, id, uint64(keyRange.start), uint64(keyRange.len()))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
valueOffsets := make(map[string]iRange)
|
|
_, err = decodeSingle(keyData, func(key []byte, start int, limit int) error {
|
|
valueOffsets[string(key)] = iRange{
|
|
start: uint32(start),
|
|
limit: uint32(limit),
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &singleTrienodeHistoryReader{
|
|
id: id,
|
|
reader: reader,
|
|
valueRange: valueRange,
|
|
valueInternalOffsets: valueOffsets,
|
|
}, nil
|
|
}
|
|
|
|
// read retrieves the trie node data with the provided node path.
|
|
func (sr *singleTrienodeHistoryReader) read(path string) ([]byte, error) {
|
|
offset, exists := sr.valueInternalOffsets[path]
|
|
if !exists {
|
|
return nil, fmt.Errorf("trienode %v not found", []byte(path))
|
|
}
|
|
return rawdb.ReadTrienodeHistoryValueSection(sr.reader, sr.id, uint64(sr.valueRange.start+offset.start), uint64(offset.len()))
|
|
}
|
|
|
|
// trienodeHistoryReader provides read access to node data in the trie node history.
|
|
// It resolves data from the underlying ancient store only when needed, minimizing
|
|
// I/O overhead.
|
|
type trienodeHistoryReader struct {
|
|
id uint64 // ID of the associated trienode history
|
|
reader ethdb.AncientReader // Database reader of ancient store
|
|
keyRanges map[common.Hash]iRange // Key ranges identifying trie chunks
|
|
valRanges map[common.Hash]iRange // Value ranges identifying trie chunks
|
|
iReaders map[common.Hash]*singleTrienodeHistoryReader // readers for each individual trie chunk
|
|
}
|
|
|
|
// newTrienodeHistoryReader constructs the reader for specific trienode history.
|
|
func newTrienodeHistoryReader(id uint64, reader ethdb.AncientReader) (*trienodeHistoryReader, error) {
|
|
r := &trienodeHistoryReader{
|
|
id: id,
|
|
reader: reader,
|
|
keyRanges: make(map[common.Hash]iRange),
|
|
valRanges: make(map[common.Hash]iRange),
|
|
iReaders: make(map[common.Hash]*singleTrienodeHistoryReader),
|
|
}
|
|
if err := r.decodeHeader(); err != nil {
|
|
return nil, err
|
|
}
|
|
return r, nil
|
|
}
|
|
|
|
// decodeHeader decodes the header section of trienode history.
|
|
func (r *trienodeHistoryReader) decodeHeader() error {
|
|
header, err := rawdb.ReadTrienodeHistoryHeader(r.reader, r.id)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
_, owners, keyOffsets, valOffsets, err := decodeHeader(header)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for i, owner := range owners {
|
|
// Decode the key range for this trie chunk
|
|
var keyStart uint32
|
|
if i != 0 {
|
|
keyStart = keyOffsets[i-1]
|
|
}
|
|
r.keyRanges[owner] = iRange{
|
|
start: keyStart,
|
|
limit: keyOffsets[i],
|
|
}
|
|
|
|
// Decode the value range for this trie chunk
|
|
var valStart uint32
|
|
if i != 0 {
|
|
valStart = valOffsets[i-1]
|
|
}
|
|
r.valRanges[owner] = iRange{
|
|
start: valStart,
|
|
limit: valOffsets[i],
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// read retrieves the trie node data with the provided TrieID and node path.
|
|
func (r *trienodeHistoryReader) read(owner common.Hash, path string) ([]byte, error) {
|
|
ir, ok := r.iReaders[owner]
|
|
if !ok {
|
|
keyRange, exists := r.keyRanges[owner]
|
|
if !exists {
|
|
return nil, fmt.Errorf("trie %x is unknown", owner)
|
|
}
|
|
valRange, exists := r.valRanges[owner]
|
|
if !exists {
|
|
return nil, fmt.Errorf("trie %x is unknown", owner)
|
|
}
|
|
var err error
|
|
ir, err = newSingleTrienodeHistoryReader(r.id, r.reader, keyRange, valRange)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
r.iReaders[owner] = ir
|
|
}
|
|
return ir.read(path)
|
|
}
|
|
|
|
// writeTrienodeHistory persists the trienode history associated with the given diff layer.
|
|
// nolint:unused
|
|
func writeTrienodeHistory(writer ethdb.AncientWriter, dl *diffLayer) error {
|
|
start := time.Now()
|
|
h := newTrienodeHistory(dl.rootHash(), dl.parent.rootHash(), dl.block, dl.nodes.nodeOrigin)
|
|
header, keySection, valueSection, err := h.encode()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// Write history data into five freezer table respectively.
|
|
if err := rawdb.WriteTrienodeHistory(writer, dl.stateID(), header, keySection, valueSection); err != nil {
|
|
return err
|
|
}
|
|
trienodeHistoryDataBytesMeter.Mark(int64(len(valueSection)))
|
|
trienodeHistoryIndexBytesMeter.Mark(int64(len(header) + len(keySection)))
|
|
trienodeHistoryBuildTimeMeter.UpdateSince(start)
|
|
|
|
log.Debug(
|
|
"Stored trienode history", "id", dl.stateID(), "block", dl.block,
|
|
"header", common.StorageSize(len(header)),
|
|
"keySection", common.StorageSize(len(keySection)),
|
|
"valueSection", common.StorageSize(len(valueSection)),
|
|
"elapsed", common.PrettyDuration(time.Since(start)),
|
|
)
|
|
return nil
|
|
}
|
|
|
|
// readTrienodeMetadata resolves the metadata of the specified trienode history.
|
|
// nolint:unused
|
|
func readTrienodeMetadata(reader ethdb.AncientReader, id uint64) (*trienodeMetadata, error) {
|
|
header, err := rawdb.ReadTrienodeHistoryHeader(reader, id)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
metadata, _, _, _, err := decodeHeader(header)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return metadata, nil
|
|
}
|
|
|
|
// readTrienodeHistory resolves a single trienode history object with specific id.
|
|
func readTrienodeHistory(reader ethdb.AncientReader, id uint64) (*trienodeHistory, error) {
|
|
header, keySection, valueSection, err := rawdb.ReadTrienodeHistory(reader, id)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var h trienodeHistory
|
|
if err := h.decode(header, keySection, valueSection); err != nil {
|
|
return nil, err
|
|
}
|
|
return &h, nil
|
|
}
|
|
|
|
// readTrienodeHistories resolves a list of trienode histories with the specific range.
|
|
func readTrienodeHistories(reader ethdb.AncientReader, start uint64, count uint64) ([]history, error) {
|
|
headers, keySections, valueSections, err := rawdb.ReadTrienodeHistoryList(reader, start, count)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var res []history
|
|
for i, header := range headers {
|
|
var h trienodeHistory
|
|
if err := h.decode(header, keySections[i], valueSections[i]); err != nil {
|
|
return nil, err
|
|
}
|
|
res = append(res, &h)
|
|
}
|
|
return res, nil
|
|
}
|