// Copyright 2025 The go-ethereum Authors // This file is part of the go-ethereum library. // // The go-ethereum library is free software: you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // The go-ethereum library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . package pathdb import ( "encoding/binary" "fmt" "math/bits" "slices" "strings" ) // commonPrefixLen returns the length of the common prefix shared by a and b. func commonPrefixLen(a, b []byte) int { n := min(len(a), len(b)) for i := range n { if a[i] != b[i] { return i } } return n } // findLeafPaths scans a lexicographically sorted list of paths and returns // the subset of paths that represent leaves. // // A path is considered a leaf if: // - it is the last element in the list, or // - the next path does not have the current path as its prefix. // // In other words, a leaf is a path that has no children extending it. // // Example: // // Input: ["a", "ab", "abc", "b", "ba"] // Output: ["abc", "ba"] // // The input must be sorted; otherwise the result is undefined. func findLeafPaths(paths []string) []string { var leaves []string for i := 0; i < len(paths); i++ { if i == len(paths)-1 || !strings.HasPrefix(paths[i+1], paths[i]) { leaves = append(leaves, paths[i]) } } return leaves } // hexPathNodeID computes a numeric node ID from the given path. The path is // interpreted as a sequence of base-16 digits, where each byte of the input // is treated as one hexadecimal digit in a big-endian number. // // The resulting node ID is constructed as: // // ID = 1 + 16 + 16^2 + ... + 16^(n-1) + value // // where n is the number of bytes in the path, and `value` is the base-16 // interpretation of the byte sequence. // // The offset (1 + 16 + 16^2 + ... + 16^(n-1)) ensures that all IDs of shorter // paths occupy a lower numeric range, preserving lexicographic ordering between // differently-length paths. // // The numeric node ID is represented by the uint16 with the assumption the length // of path won't be greater than 3. func hexPathNodeID(path string) uint16 { var ( offset = uint16(0) pow = uint16(1) value = uint16(0) bytes = []byte(path) ) for i := 0; i < len(bytes); i++ { offset += pow pow *= 16 } for i := 0; i < len(bytes); i++ { value = value*16 + uint16(bytes[i]) } return offset + value } // bitmapSize computes the number of bytes required for the marker bitmap // corresponding to the remaining portion of a path after a cut point. // The marker is a bitmap where each bit represents the presence of a // possible element in the remaining path segment. func bitmapSize(levels int) int { // Compute: total = 1 + 16 + 16^2 + ... + 16^(segLen-1) var ( bits = 0 pow = 1 ) for i := 0; i < levels; i++ { bits += pow pow *= 16 } // A small adjustment is applied to exclude the root element of this path // segment, since any existing element would already imply the mutation of // the root element. This trick can save us 1 byte for each bitmap which is // non-trivial. bits -= 1 return bits / 8 } // indexScheme defines how trie nodes are split into chunks and index them // at chunk level. // // skipRoot indicates whether the root node should be excluded from indexing. // cutPoints specifies the key length of chunks (in nibbles) extracted from // each path. type indexScheme struct { // skipRoot indicates whether the root node should be excluded from indexing. // In the account trie, the root is mutated on every state transition, so // indexing it provides no value. skipRoot bool // cutPoints defines the key lengths of chunks at different positions. // A single trie node path may span multiple chunks vertically. cutPoints []int // bitmaps specifies the required bitmap size for each chunk. The key is the // chunk key length, and the value is the corresponding bitmap size. bitmaps map[int]int } var ( // Account trie is split into chunks like this: // // - root node is excluded from indexing // - nodes at level1 to level2 are grouped as 16 chunks // - all other nodes are grouped 3 levels per chunk // // Level1 [0] ... [f] 16 chunks // Level3 [000] ... [fff] 4096 chunks // Level6 [000000] ... [fffffff] 16777216 chunks // // For the chunks at level1, there are 17 nodes per chunk. // // chunk-level 0 [ 0 ] 1 node // chunk-level 1 [ 1 ] … [ 16 ] 16 nodes // // For the non-level1 chunks, there are 273 nodes per chunk, // regardless of the chunk's depth in the trie. // // chunk-level 0 [ 0 ] 1 node // chunk-level 1 [ 1 ] … [ 16 ] 16 nodes // chunk-level 2 [ 17 ] … … [ 272 ] 256 nodes accountIndexScheme = newIndexScheme(true) // Storage trie is split into chunks like this: (3 levels per chunk) // // Level0 [ ROOT ] 1 chunk // Level3 [000] ... [fff] 4096 chunks // Level6 [000000] ... [fffffff] 16777216 chunks // // Within each chunk, there are 273 nodes in total, regardless of // the chunk's depth in the trie. // // chunk-level 0 [ 0 ] 1 node // chunk-level 1 [ 1 ] … [ 16 ] 16 nodes // chunk-level 2 [ 17 ] … … [ 272 ] 256 nodes storageIndexScheme = newIndexScheme(false) ) // newIndexScheme initializes the index scheme. func newIndexScheme(skipRoot bool) *indexScheme { var ( cuts []int bitmaps = make(map[int]int) ) for v := 0; v <= 64; v += 3 { var ( levels int length int ) if v == 0 && skipRoot { length = 1 levels = 2 } else { length = v levels = 3 } cuts = append(cuts, length) bitmaps[length] = bitmapSize(levels) } return &indexScheme{ skipRoot: skipRoot, cutPoints: cuts, bitmaps: bitmaps, } } // getBitmapSize returns the required bytes for bitmap with chunk's position. func (s *indexScheme) getBitmapSize(pathLen int) int { return s.bitmaps[pathLen] } // chunkSpan returns how many chunks should be spanned with the given path. func (s *indexScheme) chunkSpan(length int) int { var n int for _, cut := range s.cutPoints { if length >= cut { n++ continue } } return n } // splitPath applies the indexScheme to the given path and returns two lists: // // - chunkIDs: the progressive chunk IDs cuts defined by the scheme // - innerIDs: the computed node ID for the path segment following each cut // // The scheme defines a set of cut points that partition the path. For each cut: // // - chunkIDs[i] is path[:cutPoints[i]] // - innerIDs[i] is the node ID of the segment path[cutPoints[i] : nextCut-1] func (s *indexScheme) splitPath(path string) ([]string, []uint16) { // Special case: the root node of the account trie is mutated in every // state transition, so its mutation records can be ignored. n := len(path) if n == 0 && s.skipRoot { return nil, nil } var ( // Determine how many chunks are spanned by the path chunks = s.chunkSpan(n) chunkIDs = make([]string, 0, chunks) nodeIDs = make([]uint16, 0, chunks) ) for i := 0; i < chunks; i++ { position := s.cutPoints[i] chunkIDs = append(chunkIDs, path[:position]) var limit int if i != chunks-1 { limit = s.cutPoints[i+1] - 1 } else { limit = len(path) } nodeIDs = append(nodeIDs, hexPathNodeID(path[position:limit])) } return chunkIDs, nodeIDs } // splitPathLast returns the path prefix of the deepest chunk spanned by the // given path, along with its corresponding internal node ID. If the path // spans no chunks, it returns an empty prefix and 0. // // nolint:unused func (s *indexScheme) splitPathLast(path string) (string, uint16) { chunkIDs, nodeIDs := s.splitPath(path) if len(chunkIDs) == 0 { return "", 0 } n := len(chunkIDs) return chunkIDs[n-1], nodeIDs[n-1] } // encodeIDs sorts the given list of uint16 IDs and encodes them into a // compact byte slice using variable-length unsigned integer encoding. func encodeIDs(ids []uint16) []byte { slices.Sort(ids) buf := make([]byte, 0, len(ids)) for _, id := range ids { buf = binary.AppendUvarint(buf, uint64(id)) } return buf } // decodeIDs decodes a sequence of variable-length encoded uint16 IDs from the // given byte slice and returns them as a set. // // Returns an error if the input buffer does not contain a complete Uvarint value. func decodeIDs(buf []byte) ([]uint16, error) { var res []uint16 for len(buf) > 0 { id, n := binary.Uvarint(buf) if n <= 0 { return nil, fmt.Errorf("too short for decoding node id, %v", buf) } buf = buf[n:] res = append(res, uint16(id)) } return res, nil } // isAncestor reports whether node x is the ancestor of node y. func isAncestor(x, y uint16) bool { for y > x { y = (y - 1) / 16 // parentID(y) = (y - 1) / 16 if y == x { return true } } return false } // isBitSet reports whether the bit at `index` in the byte slice `b` is set. func isBitSet(b []byte, index int) bool { return b[index/8]&(1<<(7-index%8)) != 0 } // setBit sets the bit at `index` in the byte slice `b` to 1. func setBit(b []byte, index int) { b[index/8] |= 1 << (7 - index%8) } // bitPosTwoBytes returns the positions of set bits in a 2-byte bitmap. // // The bitmap is interpreted as a big-endian uint16. Bit positions are // numbered from 0 to 15, where position 0 corresponds to the most // significant bit of b[0], and position 15 corresponds to the least // significant bit of b[1]. func bitPosTwoBytes(b []byte) []int { if len(b) != 2 { panic("expect 2 bytes") } var ( pos []int mask = binary.BigEndian.Uint16(b) ) for mask != 0 { p := bits.LeadingZeros16(mask) pos = append(pos, p) mask &^= 1 << (15 - p) } return pos }