go-ethereum/triedb/pathdb/history_trienode_utils.go
rjl493456442 588dd94aad
triedb/pathdb: implement trienode history indexing scheme (#33551)
This PR implements the indexing scheme for trie node history. Check
https://github.com/ethereum/go-ethereum/pull/33399 for more details
2026-01-17 20:28:37 +08:00

344 lines
10 KiB
Go

// Copyright 2025 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package pathdb
import (
"encoding/binary"
"fmt"
"math/bits"
"slices"
"strings"
)
// commonPrefixLen returns the length of the common prefix shared by a and b.
func commonPrefixLen(a, b []byte) int {
n := min(len(a), len(b))
for i := range n {
if a[i] != b[i] {
return i
}
}
return n
}
// findLeafPaths scans a lexicographically sorted list of paths and returns
// the subset of paths that represent leaves.
//
// A path is considered a leaf if:
// - it is the last element in the list, or
// - the next path does not have the current path as its prefix.
//
// In other words, a leaf is a path that has no children extending it.
//
// Example:
//
// Input: ["a", "ab", "abc", "b", "ba"]
// Output: ["abc", "ba"]
//
// The input must be sorted; otherwise the result is undefined.
func findLeafPaths(paths []string) []string {
var leaves []string
for i := 0; i < len(paths); i++ {
if i == len(paths)-1 || !strings.HasPrefix(paths[i+1], paths[i]) {
leaves = append(leaves, paths[i])
}
}
return leaves
}
// hexPathNodeID computes a numeric node ID from the given path. The path is
// interpreted as a sequence of base-16 digits, where each byte of the input
// is treated as one hexadecimal digit in a big-endian number.
//
// The resulting node ID is constructed as:
//
// ID = 1 + 16 + 16^2 + ... + 16^(n-1) + value
//
// where n is the number of bytes in the path, and `value` is the base-16
// interpretation of the byte sequence.
//
// The offset (1 + 16 + 16^2 + ... + 16^(n-1)) ensures that all IDs of shorter
// paths occupy a lower numeric range, preserving lexicographic ordering between
// differently-length paths.
//
// The numeric node ID is represented by the uint16 with the assumption the length
// of path won't be greater than 3.
func hexPathNodeID(path string) uint16 {
var (
offset = uint16(0)
pow = uint16(1)
value = uint16(0)
bytes = []byte(path)
)
for i := 0; i < len(bytes); i++ {
offset += pow
pow *= 16
}
for i := 0; i < len(bytes); i++ {
value = value*16 + uint16(bytes[i])
}
return offset + value
}
// bitmapSize computes the number of bytes required for the marker bitmap
// corresponding to the remaining portion of a path after a cut point.
// The marker is a bitmap where each bit represents the presence of a
// possible element in the remaining path segment.
func bitmapSize(levels int) int {
// Compute: total = 1 + 16 + 16^2 + ... + 16^(segLen-1)
var (
bits = 0
pow = 1
)
for i := 0; i < levels; i++ {
bits += pow
pow *= 16
}
// A small adjustment is applied to exclude the root element of this path
// segment, since any existing element would already imply the mutation of
// the root element. This trick can save us 1 byte for each bitmap which is
// non-trivial.
bits -= 1
return bits / 8
}
// indexScheme defines how trie nodes are split into chunks and index them
// at chunk level.
//
// skipRoot indicates whether the root node should be excluded from indexing.
// cutPoints specifies the key length of chunks (in nibbles) extracted from
// each path.
type indexScheme struct {
// skipRoot indicates whether the root node should be excluded from indexing.
// In the account trie, the root is mutated on every state transition, so
// indexing it provides no value.
skipRoot bool
// cutPoints defines the key lengths of chunks at different positions.
// A single trie node path may span multiple chunks vertically.
cutPoints []int
// bitmaps specifies the required bitmap size for each chunk. The key is the
// chunk key length, and the value is the corresponding bitmap size.
bitmaps map[int]int
}
var (
// Account trie is split into chunks like this:
//
// - root node is excluded from indexing
// - nodes at level1 to level2 are grouped as 16 chunks
// - all other nodes are grouped 3 levels per chunk
//
// Level1 [0] ... [f] 16 chunks
// Level3 [000] ... [fff] 4096 chunks
// Level6 [000000] ... [fffffff] 16777216 chunks
//
// For the chunks at level1, there are 17 nodes per chunk.
//
// chunk-level 0 [ 0 ] 1 node
// chunk-level 1 [ 1 ] … [ 16 ] 16 nodes
//
// For the non-level1 chunks, there are 273 nodes per chunk,
// regardless of the chunk's depth in the trie.
//
// chunk-level 0 [ 0 ] 1 node
// chunk-level 1 [ 1 ] … [ 16 ] 16 nodes
// chunk-level 2 [ 17 ] … … [ 272 ] 256 nodes
accountIndexScheme = newIndexScheme(true)
// Storage trie is split into chunks like this: (3 levels per chunk)
//
// Level0 [ ROOT ] 1 chunk
// Level3 [000] ... [fff] 4096 chunks
// Level6 [000000] ... [fffffff] 16777216 chunks
//
// Within each chunk, there are 273 nodes in total, regardless of
// the chunk's depth in the trie.
//
// chunk-level 0 [ 0 ] 1 node
// chunk-level 1 [ 1 ] … [ 16 ] 16 nodes
// chunk-level 2 [ 17 ] … … [ 272 ] 256 nodes
storageIndexScheme = newIndexScheme(false)
)
// newIndexScheme initializes the index scheme.
func newIndexScheme(skipRoot bool) *indexScheme {
var (
cuts []int
bitmaps = make(map[int]int)
)
for v := 0; v <= 64; v += 3 {
var (
levels int
length int
)
if v == 0 && skipRoot {
length = 1
levels = 2
} else {
length = v
levels = 3
}
cuts = append(cuts, length)
bitmaps[length] = bitmapSize(levels)
}
return &indexScheme{
skipRoot: skipRoot,
cutPoints: cuts,
bitmaps: bitmaps,
}
}
// getBitmapSize returns the required bytes for bitmap with chunk's position.
func (s *indexScheme) getBitmapSize(pathLen int) int {
return s.bitmaps[pathLen]
}
// chunkSpan returns how many chunks should be spanned with the given path.
func (s *indexScheme) chunkSpan(length int) int {
var n int
for _, cut := range s.cutPoints {
if length >= cut {
n++
continue
}
}
return n
}
// splitPath applies the indexScheme to the given path and returns two lists:
//
// - chunkIDs: the progressive chunk IDs cuts defined by the scheme
// - innerIDs: the computed node ID for the path segment following each cut
//
// The scheme defines a set of cut points that partition the path. For each cut:
//
// - chunkIDs[i] is path[:cutPoints[i]]
// - innerIDs[i] is the node ID of the segment path[cutPoints[i] : nextCut-1]
func (s *indexScheme) splitPath(path string) ([]string, []uint16) {
// Special case: the root node of the account trie is mutated in every
// state transition, so its mutation records can be ignored.
n := len(path)
if n == 0 && s.skipRoot {
return nil, nil
}
var (
// Determine how many chunks are spanned by the path
chunks = s.chunkSpan(n)
chunkIDs = make([]string, 0, chunks)
nodeIDs = make([]uint16, 0, chunks)
)
for i := 0; i < chunks; i++ {
position := s.cutPoints[i]
chunkIDs = append(chunkIDs, path[:position])
var limit int
if i != chunks-1 {
limit = s.cutPoints[i+1] - 1
} else {
limit = len(path)
}
nodeIDs = append(nodeIDs, hexPathNodeID(path[position:limit]))
}
return chunkIDs, nodeIDs
}
// splitPathLast returns the path prefix of the deepest chunk spanned by the
// given path, along with its corresponding internal node ID. If the path
// spans no chunks, it returns an empty prefix and 0.
//
// nolint:unused
func (s *indexScheme) splitPathLast(path string) (string, uint16) {
chunkIDs, nodeIDs := s.splitPath(path)
if len(chunkIDs) == 0 {
return "", 0
}
n := len(chunkIDs)
return chunkIDs[n-1], nodeIDs[n-1]
}
// encodeIDs sorts the given list of uint16 IDs and encodes them into a
// compact byte slice using variable-length unsigned integer encoding.
func encodeIDs(ids []uint16) []byte {
slices.Sort(ids)
buf := make([]byte, 0, len(ids))
for _, id := range ids {
buf = binary.AppendUvarint(buf, uint64(id))
}
return buf
}
// decodeIDs decodes a sequence of variable-length encoded uint16 IDs from the
// given byte slice and returns them as a set.
//
// Returns an error if the input buffer does not contain a complete Uvarint value.
func decodeIDs(buf []byte) ([]uint16, error) {
var res []uint16
for len(buf) > 0 {
id, n := binary.Uvarint(buf)
if n <= 0 {
return nil, fmt.Errorf("too short for decoding node id, %v", buf)
}
buf = buf[n:]
res = append(res, uint16(id))
}
return res, nil
}
// isAncestor reports whether node x is the ancestor of node y.
func isAncestor(x, y uint16) bool {
for y > x {
y = (y - 1) / 16 // parentID(y) = (y - 1) / 16
if y == x {
return true
}
}
return false
}
// isBitSet reports whether the bit at `index` in the byte slice `b` is set.
func isBitSet(b []byte, index int) bool {
return b[index/8]&(1<<(7-index%8)) != 0
}
// setBit sets the bit at `index` in the byte slice `b` to 1.
func setBit(b []byte, index int) {
b[index/8] |= 1 << (7 - index%8)
}
// bitPosTwoBytes returns the positions of set bits in a 2-byte bitmap.
//
// The bitmap is interpreted as a big-endian uint16. Bit positions are
// numbered from 0 to 15, where position 0 corresponds to the most
// significant bit of b[0], and position 15 corresponds to the least
// significant bit of b[1].
func bitPosTwoBytes(b []byte) []int {
if len(b) != 2 {
panic("expect 2 bytes")
}
var (
pos []int
mask = binary.BigEndian.Uint16(b)
)
for mask != 0 {
p := bits.LeadingZeros16(mask)
pos = append(pos, p)
mask &^= 1 << (15 - p)
}
return pos
}