trie: group 2^N binary trie nodes in serialization (#34794)
Some checks are pending
/ Linux Build (push) Waiting to run
/ Linux Build (arm) (push) Waiting to run
/ Keeper Build (push) Waiting to run
/ Windows Build (push) Waiting to run
/ Docker Image (push) Waiting to run

This PR addresses one of the biggest performance issue with binary
tries: storing each internal node individually bloats the index, the
disk, and triggers a lot of write amplifications. To fix this issue,
this PR serializes groups of nodes together.

Because we are still looking for the ideal group size, the "depth" of
the group tree is made a parameter, but that will be removed in the
future, once the perfect size is known.


This is a rebase of #33658

---------

Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
Guillaume Ballet 2026-05-01 15:28:19 +02:00 committed by GitHub
parent 68646229a0
commit a15778c52f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 402 additions and 136 deletions

View file

@ -546,7 +546,7 @@ func BinKeys(ctx *cli.Context) error {
db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), triedb.UBTDefaults)
defer db.Close()
bt, err := genBinTrieFromAlloc(alloc, db)
bt, err := genBinTrieFromAlloc(alloc, db, triedb.UBTDefaults.BinTrieGroupDepth)
if err != nil {
return fmt.Errorf("error generating bt: %w", err)
}
@ -590,7 +590,7 @@ func BinTrieRoot(ctx *cli.Context) error {
db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), triedb.UBTDefaults)
defer db.Close()
bt, err := genBinTrieFromAlloc(alloc, db)
bt, err := genBinTrieFromAlloc(alloc, db, triedb.UBTDefaults.BinTrieGroupDepth)
if err != nil {
return fmt.Errorf("error generating bt: %w", err)
}
@ -600,8 +600,8 @@ func BinTrieRoot(ctx *cli.Context) error {
}
// TODO(@CPerezz): Should this go to `bintrie` module?
func genBinTrieFromAlloc(alloc core.GenesisAlloc, db database.NodeDatabase) (*bintrie.BinaryTrie, error) {
bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, db)
func genBinTrieFromAlloc(alloc core.GenesisAlloc, db database.NodeDatabase, groupDepth int) (*bintrie.BinaryTrie, error) {
bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, db, groupDepth)
if err != nil {
return nil, err
}

View file

@ -151,7 +151,7 @@ func convertToBinaryTrie(ctx *cli.Context) error {
})
defer destTriedb.Close()
binTrie, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb)
binTrie, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb, ctx.Int(utils.BinTrieGroupDepthFlag.Name))
if err != nil {
return fmt.Errorf("failed to create binary trie: %w", err)
}
@ -319,7 +319,7 @@ func commitBinaryTrie(bt *bintrie.BinaryTrie, currentRoot common.Hash, destDB *t
runtime.GC()
debug.FreeOSMemory()
bt, err := bintrie.NewBinaryTrie(newRoot, destDB)
bt, err := bintrie.NewBinaryTrie(newRoot, destDB, bt.GroupDepth())
if err != nil {
return nil, common.Hash{}, fmt.Errorf("failed to reload binary trie: %w", err)
}

View file

@ -87,7 +87,7 @@ func TestBintrieConvert(t *testing.T) {
})
defer destTriedb.Close()
bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb)
bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb, 8)
if err != nil {
t.Fatalf("failed to create binary trie: %v", err)
}
@ -98,7 +98,7 @@ func TestBintrieConvert(t *testing.T) {
}
t.Logf("Binary trie root: %x", currentRoot)
bt2, err := bintrie.NewBinaryTrie(currentRoot, destTriedb)
bt2, err := bintrie.NewBinaryTrie(currentRoot, destTriedb, 8)
if err != nil {
t.Fatalf("failed to reload binary trie: %v", err)
}
@ -194,7 +194,7 @@ func TestBintrieConvertDeleteSource(t *testing.T) {
PathDB: pathdb.Defaults,
})
bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb)
bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb, 8)
if err != nil {
t.Fatalf("failed to create binary trie: %v", err)
}
@ -209,7 +209,7 @@ func TestBintrieConvertDeleteSource(t *testing.T) {
}
srcTriedb2.Close()
bt2, err := bintrie.NewBinaryTrie(newRoot, destTriedb)
bt2, err := bintrie.NewBinaryTrie(newRoot, destTriedb, 8)
if err != nil {
t.Fatalf("failed to reload binary trie after deletion: %v", err)
}

View file

@ -95,6 +95,7 @@ var (
utils.StateHistoryFlag,
utils.TrienodeHistoryFlag,
utils.TrienodeHistoryFullValueCheckpointFlag,
utils.BinTrieGroupDepthFlag,
utils.LightKDFFlag,
utils.EthRequiredBlocksFlag,
utils.LegacyWhitelistFlag, // deprecated

View file

@ -297,6 +297,12 @@ var (
Value: ethconfig.Defaults.EnableStateSizeTracking,
Category: flags.StateCategory,
}
BinTrieGroupDepthFlag = &cli.IntFlag{
Name: "bintrie.groupdepth",
Usage: "Number of levels per serialized group in binary trie (1-8, default 5). Lower values create smaller groups with more nodes.",
Value: 5,
Category: flags.StateCategory,
}
StateHistoryFlag = &cli.Uint64Flag{
Name: "history.state",
Usage: "Number of recent blocks to retain state history for, only relevant in state.scheme=path (default = 90,000 blocks, 0 = entire chain)",
@ -1817,6 +1823,9 @@ func SetEthConfig(ctx *cli.Context, stack *node.Node, cfg *ethconfig.Config) {
if ctx.IsSet(TrienodeHistoryFullValueCheckpointFlag.Name) {
cfg.NodeFullValueCheckpoint = uint32(ctx.Uint(TrienodeHistoryFullValueCheckpointFlag.Name))
}
if ctx.IsSet(BinTrieGroupDepthFlag.Name) {
cfg.BinTrieGroupDepth = ctx.Int(BinTrieGroupDepthFlag.Name)
}
if ctx.IsSet(StateSchemeFlag.Name) {
cfg.StateScheme = ctx.String(StateSchemeFlag.Name)
}
@ -2433,6 +2442,7 @@ func MakeChain(ctx *cli.Context, stack *node.Node, readonly bool) (*core.BlockCh
StateHistory: ctx.Uint64(StateHistoryFlag.Name),
TrienodeHistory: ctx.Int64(TrienodeHistoryFlag.Name),
NodeFullValueCheckpoint: uint32(ctx.Uint(TrienodeHistoryFullValueCheckpointFlag.Name)),
BinTrieGroupDepth: ctx.Int(BinTrieGroupDepthFlag.Name),
// Disable transaction indexing/unindexing.
TxLookupLimit: -1,

View file

@ -92,6 +92,7 @@ func TestProcessUBT(t *testing.T) {
// genesis := gspec.MustCommit(bcdb, triedb)
options := DefaultConfig().WithStateScheme(rawdb.PathScheme)
options.SnapshotLimit = 0
options.BinTrieGroupDepth = triedb.DefaultBinTrieGroupDepth
blockchain, _ := NewBlockChain(bcdb, gspec, beacon.New(ethash.NewFaker()), options)
defer blockchain.Stop()
@ -218,6 +219,7 @@ func TestProcessParentBlockHash(t *testing.T) {
t.Run("UBT", func(t *testing.T) {
db := rawdb.NewMemoryDatabase()
cacheConfig := DefaultConfig().WithStateScheme(rawdb.PathScheme)
cacheConfig.BinTrieGroupDepth = triedb.DefaultBinTrieGroupDepth
cacheConfig.SnapshotLimit = 0
triedb := triedb.NewDatabase(db, cacheConfig.triedbConfig(true))
statedb, _ := state.New(types.EmptyBinaryHash, state.NewDatabase(triedb, nil))

View file

@ -170,9 +170,10 @@ type BlockChainConfig struct {
TrieNoAsyncFlush bool // Whether the asynchronous buffer flushing is disallowed
TrieJournalDirectory string // Directory path to the journal used for persisting trie data across node restarts
Preimages bool // Whether to store preimage of trie key to the disk
StateScheme string // Scheme used to store ethereum states and merkle tree nodes on top
ArchiveMode bool // Whether to enable the archive mode
Preimages bool // Whether to store preimage of trie key to the disk
StateScheme string // Scheme used to store ethereum states and merkle tree nodes on top
ArchiveMode bool // Whether to enable the archive mode
BinTrieGroupDepth int // Number of levels per serialized group in binary trie (1-8)
// Number of blocks from the chain head for which state histories are retained.
// If set to 0, all state histories across the entire chain will be retained;
@ -260,8 +261,9 @@ func (cfg BlockChainConfig) WithNoAsyncFlush(on bool) *BlockChainConfig {
// triedbConfig derives the configures for trie database.
func (cfg *BlockChainConfig) triedbConfig(isUBT bool) *triedb.Config {
config := &triedb.Config{
Preimages: cfg.Preimages,
IsUBT: isUBT,
Preimages: cfg.Preimages,
IsUBT: isUBT,
BinTrieGroupDepth: cfg.BinTrieGroupDepth,
}
if cfg.StateScheme == rawdb.HashScheme {
config.HashDB = &hashdb.Config{

View file

@ -136,8 +136,9 @@ func hashAlloc(ga *types.GenesisAlloc, isUBT bool) (common.Hash, error) {
var config *triedb.Config
if isUBT {
config = &triedb.Config{
PathDB: pathdb.Defaults,
IsUBT: true,
PathDB: pathdb.Defaults,
IsUBT: true,
BinTrieGroupDepth: triedb.UBTDefaults.BinTrieGroupDepth,
}
}
// Create an ephemeral in-memory database for computing hash,

View file

@ -261,9 +261,9 @@ func newDbConfig(scheme string) *triedb.Config {
return &triedb.Config{PathDB: &config}
}
func TestVerkleGenesisCommit(t *testing.T) {
var verkleTime uint64 = 0
verkleConfig := &params.ChainConfig{
func TestBinaryGenesisCommit(t *testing.T) {
var ubtTime uint64 = 0
ubtConfig := &params.ChainConfig{
ChainID: big.NewInt(1),
HomesteadBlock: big.NewInt(0),
DAOForkBlock: nil,
@ -281,11 +281,11 @@ func TestVerkleGenesisCommit(t *testing.T) {
ArrowGlacierBlock: big.NewInt(0),
GrayGlacierBlock: big.NewInt(0),
MergeNetsplitBlock: nil,
ShanghaiTime: &verkleTime,
CancunTime: &verkleTime,
PragueTime: &verkleTime,
OsakaTime: &verkleTime,
UBTTime: &verkleTime,
ShanghaiTime: &ubtTime,
CancunTime: &ubtTime,
PragueTime: &ubtTime,
OsakaTime: &ubtTime,
UBTTime: &ubtTime,
TerminalTotalDifficulty: big.NewInt(0),
EnableUBTAtGenesis: true,
Ethash: nil,
@ -300,8 +300,8 @@ func TestVerkleGenesisCommit(t *testing.T) {
genesis := &Genesis{
BaseFee: big.NewInt(params.InitialBaseFee),
Config: verkleConfig,
Timestamp: verkleTime,
Config: ubtConfig,
Timestamp: ubtTime,
Difficulty: big.NewInt(0),
Alloc: types.GenesisAlloc{
{1}: {Balance: big.NewInt(1), Storage: map[common.Hash]common.Hash{{1}: {1}}},
@ -320,17 +320,18 @@ func TestVerkleGenesisCommit(t *testing.T) {
config.NoAsyncFlush = true
triedb := triedb.NewDatabase(db, &triedb.Config{
IsUBT: true,
PathDB: &config,
IsUBT: true,
PathDB: &config,
BinTrieGroupDepth: triedb.DefaultBinTrieGroupDepth,
})
block := genesis.MustCommit(db, triedb)
if !bytes.Equal(block.Root().Bytes(), expected) {
t.Fatalf("invalid genesis state root, expected %x, got %x", expected, block.Root())
}
// Test that the trie is verkle
// Test that the trie is a unified binary trie
if !triedb.IsUBT() {
t.Fatalf("expected trie to be verkle")
t.Fatalf("expected trie to be a unified binary trie")
}
vdb := rawdb.NewTable(db, string(rawdb.VerklePrefix))
if !rawdb.HasAccountTrieNode(vdb, nil) {

View file

@ -96,7 +96,7 @@ func (db *UBTDatabase) ReadersWithCacheStats(stateRoot common.Hash) (Reader, Rea
// OpenTrie opens the main account trie at a specific root hash.
func (db *UBTDatabase) OpenTrie(root common.Hash) (Trie, error) {
return bintrie.NewBinaryTrie(root, db.triedb)
return bintrie.NewBinaryTrie(root, db.triedb, db.triedb.BinTrieGroupDepth())
}
// OpenStorageTrie opens the storage trie of an account. In binary trie mode,

View file

@ -255,7 +255,7 @@ type ubtTrieReader struct {
// newUBTTrieReader constructs a Unified-binary-trie reader of the specific state.
// An error will be returned if the associated trie specified by root is not existent.
func newUBTTrieReader(root common.Hash, db *triedb.Database) (*ubtTrieReader, error) {
binTrie, binErr := bintrie.NewBinaryTrie(root, db)
binTrie, binErr := bintrie.NewBinaryTrie(root, db, db.BinTrieGroupDepth())
if binErr != nil {
return nil, binErr
}

View file

@ -237,6 +237,7 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) {
StateHistory: config.StateHistory,
TrienodeHistory: config.TrienodeHistory,
NodeFullValueCheckpoint: config.NodeFullValueCheckpoint,
BinTrieGroupDepth: config.BinTrieGroupDepth,
StateScheme: scheme,
HistoryPolicy: histPolicy,
TxLookupLimit: int64(min(config.TransactionHistory, math.MaxInt64)),

View file

@ -35,6 +35,7 @@ import (
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/miner"
"github.com/ethereum/go-ethereum/params"
"github.com/ethereum/go-ethereum/triedb"
"github.com/ethereum/go-ethereum/triedb/pathdb"
)
@ -59,6 +60,7 @@ var Defaults = Config{
StateHistory: pathdb.Defaults.StateHistory,
TrienodeHistory: pathdb.Defaults.TrienodeHistory,
NodeFullValueCheckpoint: pathdb.Defaults.FullValueCheckpoint,
BinTrieGroupDepth: triedb.DefaultBinTrieGroupDepth,
DatabaseCache: 2048,
TrieCleanCache: 614,
TrieDirtyCache: 1024,
@ -125,6 +127,11 @@ type Config struct {
// consistent with persistent state.
StateScheme string `toml:",omitempty"`
// BinTrieGroupDepth is the number of levels per serialized group in binary trie.
// Valid values are 1-8, with 8 being the default (byte-aligned groups).
// Lower values create smaller groups with more nodes.
BinTrieGroupDepth int `toml:",omitempty"`
// RequiredBlocks is a set of block number -> hash mappings which must be in the
// canonical chain of all remote peers. Setting the option makes geth verify the
// presence of these blocks for every new peer connection.

View file

@ -34,6 +34,7 @@ func (c Config) MarshalTOML() (interface{}, error) {
TrienodeHistory int64 `toml:",omitempty"`
NodeFullValueCheckpoint uint32 `toml:",omitempty"`
StateScheme string `toml:",omitempty"`
BinTrieGroupDepth int `toml:",omitempty"`
RequiredBlocks map[uint64]common.Hash `toml:"-"`
SlowBlockThreshold time.Duration `toml:",omitempty"`
SkipBcVersionCheck bool `toml:"-"`
@ -87,6 +88,7 @@ func (c Config) MarshalTOML() (interface{}, error) {
enc.TrienodeHistory = c.TrienodeHistory
enc.NodeFullValueCheckpoint = c.NodeFullValueCheckpoint
enc.StateScheme = c.StateScheme
enc.BinTrieGroupDepth = c.BinTrieGroupDepth
enc.RequiredBlocks = c.RequiredBlocks
enc.SlowBlockThreshold = c.SlowBlockThreshold
enc.SkipBcVersionCheck = c.SkipBcVersionCheck
@ -144,6 +146,7 @@ func (c *Config) UnmarshalTOML(unmarshal func(interface{}) error) error {
TrienodeHistory *int64 `toml:",omitempty"`
NodeFullValueCheckpoint *uint32 `toml:",omitempty"`
StateScheme *string `toml:",omitempty"`
BinTrieGroupDepth *int `toml:",omitempty"`
RequiredBlocks map[uint64]common.Hash `toml:"-"`
SlowBlockThreshold *time.Duration `toml:",omitempty"`
SkipBcVersionCheck *bool `toml:"-"`
@ -234,6 +237,9 @@ func (c *Config) UnmarshalTOML(unmarshal func(interface{}) error) error {
if dec.StateScheme != nil {
c.StateScheme = *dec.StateScheme
}
if dec.BinTrieGroupDepth != nil {
c.BinTrieGroupDepth = *dec.BinTrieGroupDepth
}
if dec.RequiredBlocks != nil {
c.RequiredBlocks = dec.RequiredBlocks
}

View file

@ -27,8 +27,19 @@ const (
NodeTypeBytes = 1 // Size of node type prefix in serialization
HashSize = 32 // Size of a hash in bytes
StemBitmapSize = 32 // Size of the bitmap in a stem node (256 values = 32 bytes)
MaxGroupDepth = 8
)
// bitmapSizeForDepth returns the bitmap size in bytes for a given group depth.
// For depths 1-3, returns 1 byte. For depths 4-8, returns 2^(depth-3) bytes.
func bitmapSizeForDepth(groupDepth int) int {
if groupDepth <= 3 {
return 1
}
return 1 << (groupDepth - 3)
}
const (
nodeTypeStem = iota + 1
nodeTypeInternal

View file

@ -23,8 +23,8 @@ import (
"github.com/ethereum/go-ethereum/common"
)
// TestSerializeDeserializeInternalNode tests flat 65-byte serialization and
// deserialization of InternalNode through nodeStore.
// TestSerializeDeserializeInternalNode tests grouped serialization and
// deserialization of InternalNode through nodeStore at groupDepth=1.
func TestSerializeDeserializeInternalNode(t *testing.T) {
leftHash := common.HexToHash("0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef")
rightHash := common.HexToHash("0xfedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321")
@ -39,24 +39,32 @@ func TestSerializeDeserializeInternalNode(t *testing.T) {
rootNode.right = rightRef
s.root = rootRef
// Serialize the node — flat 65-byte format
serialized := s.serializeNode(rootRef)
// Serialize the node — grouped format at groupDepth=1:
// [type(1)][groupDepth(1)][bitmap(1)][leftHash(32)][rightHash(32)] = 67 bytes
serialized := s.serializeNode(rootRef, 1)
// Check the serialized format: [type(1)][leftHash(32)][rightHash(32)]
if serialized[0] != nodeTypeInternal {
t.Errorf("Expected type byte to be %d, got %d", nodeTypeInternal, serialized[0])
}
if serialized[1] != 1 {
t.Errorf("Expected groupDepth byte to be 1, got %d", serialized[1])
}
expectedLen := NodeTypeBytes + 2*HashSize // 1 + 64 = 65
expectedLen := NodeTypeBytes + 1 + 1 + 2*HashSize // type + groupDepth + bitmap + 2 hashes = 67
if len(serialized) != expectedLen {
t.Errorf("Expected serialized length to be %d, got %d", expectedLen, len(serialized))
}
// Check that left and right hashes are embedded directly
if !bytes.Equal(serialized[NodeTypeBytes:NodeTypeBytes+HashSize], leftHash[:]) {
// Both children present at a 1-level group → bitmap byte = 0b11000000.
if serialized[2] != 0xc0 {
t.Errorf("Expected bitmap byte 0xc0, got 0x%02x", serialized[2])
}
hashesStart := NodeTypeBytes + 1 + 1
if !bytes.Equal(serialized[hashesStart:hashesStart+HashSize], leftHash[:]) {
t.Error("Left hash not found at expected position")
}
if !bytes.Equal(serialized[NodeTypeBytes+HashSize:], rightHash[:]) {
if !bytes.Equal(serialized[hashesStart+HashSize:], rightHash[:]) {
t.Error("Right hash not found at expected position")
}
@ -116,7 +124,7 @@ func TestSerializeDeserializeStemNode(t *testing.T) {
}
// Serialize the node
serialized := s.serializeNode(ref)
serialized := s.serializeNode(ref, 8)
// Check the serialized format
if serialized[0] != nodeTypeStem {
@ -195,8 +203,9 @@ func TestDeserializeInvalidType(t *testing.T) {
// TestDeserializeInvalidLength tests deserialization with invalid data length.
func TestDeserializeInvalidLength(t *testing.T) {
s := newNodeStore()
// InternalNode with valid type byte but wrong length (needs exactly 65 bytes)
invalidData := []byte{nodeTypeInternal, 0, 0, 0}
// InternalNode group header with groupDepth=1 (valid) and a 1-byte bitmap
// announcing two present hashes, but the hash payload is missing.
invalidData := []byte{nodeTypeInternal, 1, 0xc0}
_, err := s.deserializeNode(invalidData, 0)
if err == nil {
@ -208,6 +217,21 @@ func TestDeserializeInvalidLength(t *testing.T) {
}
}
// TestDeserializeInvalidGroupDepth tests deserialization when the group depth
// byte is out of the supported 1..MaxGroupDepth range.
func TestDeserializeInvalidGroupDepth(t *testing.T) {
s := newNodeStore()
invalidData := []byte{nodeTypeInternal, 0, 0, 0}
_, err := s.deserializeNode(invalidData, 0)
if err == nil {
t.Fatal("Expected error for invalid group depth, got nil")
}
if err.Error() != "invalid group depth" {
t.Errorf("Expected 'invalid group depth' error, got: %v", err)
}
}
// TestKeyToPath tests the keyToPath function.
func TestKeyToPath(t *testing.T) {
tests := []struct {

View file

@ -95,7 +95,7 @@ func TestHashedNodeInsertValuesAtStem(t *testing.T) {
sn.setValue(byte(i), v)
}
}
serialized := rs.serializeNode(ref)
serialized := rs.serializeNode(ref, 8)
validResolver := func(path []byte, hash common.Hash) ([]byte, error) {
return serialized, nil

View file

@ -90,7 +90,7 @@ func TestInternalNodeGetWithResolver(t *testing.T) {
ref := rs.newStemRef(stem, 1)
sn := rs.getStem(ref.Index())
sn.setValue(5, common.HexToHash("0xabcd").Bytes())
return rs.serializeNode(ref), nil
return rs.serializeNode(ref, 8), nil
}
return nil, errors.New("node not found")
}
@ -290,10 +290,7 @@ func TestInternalNodeCollectNodes(t *testing.T) {
collectedPaths = append(collectedPaths, pathCopy)
}
err := s.collectNodes(s.root, []byte{1}, flushFn)
if err != nil {
t.Fatalf("Failed to collect nodes: %v", err)
}
s.collectNodes(s.root, []byte{1}, flushFn, 8)
// Should have collected 3 nodes: left stem, right stem, and the internal node itself
if len(collectedPaths) != 3 {

View file

@ -205,7 +205,7 @@ func (it *binaryNodeIterator) Path() []byte {
}
func (it *binaryNodeIterator) NodeBlob() []byte {
return it.store.serializeNode(it.current)
return it.store.serializeNode(it.current, it.trie.groupDepth)
}
// Leaf reports whether the iterator is currently positioned at a leaf value.

View file

@ -320,10 +320,7 @@ func TestStemNodeCollectNodes(t *testing.T) {
collectedPaths = append(collectedPaths, pathCopy)
}
err := s.collectNodes(s.root, []byte{0, 1, 0}, flushFn)
if err != nil {
t.Fatalf("Failed to collect nodes: %v", err)
}
s.collectNodes(s.root, []byte{0, 1, 0}, flushFn, 8)
// Should have collected one node (itself)
if len(collectedPaths) != 1 {

View file

@ -107,18 +107,83 @@ func (s *nodeStore) hashInternal(idx uint32) common.Hash {
return node.hash
}
// SerializeNode serializes a node into the flat on-disk format.
func (s *nodeStore) serializeNode(ref nodeRef) []byte {
// serializeSubtree recursively collects child hashes from a subtree of InternalNodes.
// It traverses up to `remainingDepth` levels, storing hashes of bottom-layer children.
// position tracks the current index (0 to 2^groupDepth - 1) for bitmap placement.
// hashes collects the hashes of present children, bitmap tracks which positions are present.
func (s *nodeStore) serializeSubtree(ref nodeRef, remainingDepth int, position int, absoluteDepth int, bitmap []byte, hashes *[]common.Hash) {
if remainingDepth == 0 {
// Bottom layer: store hash if not empty
switch ref.Kind() {
case kindEmpty:
// Leave bitmap bit unset, don't add hash
return
default:
// StemNode, HashedNode, or InternalNode at boundary: store hash
bitmap[position/8] |= 1 << (7 - (position % 8))
*hashes = append(*hashes, s.computeHash(ref))
}
return
}
switch ref.Kind() {
case kindInternal:
leftPos := position * 2
rightPos := position*2 + 1
s.serializeSubtree(s.getInternal(ref.Index()).left, remainingDepth-1, leftPos, absoluteDepth+1, bitmap, hashes)
s.serializeSubtree(s.getInternal(ref.Index()).right, remainingDepth-1, rightPos, absoluteDepth+1, bitmap, hashes)
case kindEmpty:
return
default:
// StemNode or HashedNode encountered before reaching the group's bottom
// layer. Compute the leaf bitmap position where this node's hash will
// be stored.
leafPos := position
switch ref.Kind() {
case kindStem:
sn := s.getStem(ref.Index())
// Extend position using the stem's key bits so that
// GetValuesAtStem traversal (which follows key bits) finds the hash.
for d := 0; d < remainingDepth; d++ {
bit := sn.Stem[(absoluteDepth+d)/8] >> (7 - ((absoluteDepth + d) % 8)) & 1
leafPos = leafPos*2 + int(bit)
}
default:
// HashedNode or unknown: extend all-left (no key bits available).
// This matches the all-zero path that resolveNode would follow.
leafPos = position << remainingDepth
}
bitmap[leafPos/8] |= 1 << (7 - (leafPos % 8))
*hashes = append(*hashes, s.computeHash(ref))
}
}
// SerializeNode serializes a node into the flat on-disk format.
func (s *nodeStore) serializeNode(ref nodeRef, groupDepth int) []byte {
switch ref.Kind() {
case kindInternal:
// InternalNode group: 1 byte type + 1 byte group depth + variable bitmap + N×32 byte hashes
bitmapSize := bitmapSizeForDepth(groupDepth)
bitmap := make([]byte, bitmapSize)
var hashes []common.Hash
node := s.getInternal(ref.Index())
var serialized [NodeTypeBytes + HashSize + HashSize]byte
s.serializeSubtree(ref, groupDepth, 0, int(node.depth), bitmap, &hashes)
// Build serialized output
serializedLen := NodeTypeBytes + 1 + bitmapSize + len(hashes)*HashSize
serialized := make([]byte, serializedLen)
serialized[0] = nodeTypeInternal
lh := s.computeHash(node.left)
rh := s.computeHash(node.right)
copy(serialized[NodeTypeBytes:NodeTypeBytes+HashSize], lh[:])
copy(serialized[NodeTypeBytes+HashSize:], rh[:])
return serialized[:]
serialized[1] = byte(groupDepth) // group depth => bitmap size for a sparse group
copy(serialized[2:2+bitmapSize], bitmap)
offset := NodeTypeBytes + 1 + bitmapSize
for _, h := range hashes {
copy(serialized[offset:offset+HashSize], h.Bytes())
offset += HashSize
}
return serialized
case kindStem:
sn := s.getStem(ref.Index())
@ -163,6 +228,59 @@ func (s *nodeStore) deserializeNodeWithHash(serialized []byte, depth int, hn com
return s.decodeNode(serialized, depth, hn, false, false)
}
// deserializeSubtree reconstructs an InternalNode subtree from grouped serialization.
// remainingDepth is how many more levels to build, position is current index in the bitmap,
// nodeDepth is the actual trie depth for the node being created.
// hashIdx tracks the current position in the hash data (incremented as hashes are consumed).
func (s *nodeStore) deserializeSubtree(hn common.Hash, remainingDepth int, position int, nodeDepth int, bitmap []byte, hashData []byte, hashIdx *int, mustRecompute bool, dirty bool) (nodeRef, error) {
if remainingDepth == 0 {
// Bottom layer: check bitmap and return HashedNode or Empty
if bitmap[position/8]>>(7-(position%8))&1 == 1 {
if len(hashData) < (*hashIdx+1)*HashSize {
return emptyRef, errInvalidSerializedLength
}
hash := common.BytesToHash(hashData[*hashIdx*HashSize : (*hashIdx+1)*HashSize])
*hashIdx++
return s.newHashedRef(hash), nil
}
return emptyRef, nil
}
// Check if this entire subtree is empty by examining all relevant bitmap bits
leftPos := position * 2
rightPos := position*2 + 1
// note that the parent might not need root computations, but the children
// do, because their hash isn't saved. Hence `mustRecompute` is set to `true`.
left, err := s.deserializeSubtree(common.Hash{}, remainingDepth-1, leftPos, nodeDepth+1, bitmap, hashData, hashIdx, true, dirty)
if err != nil {
return emptyRef, err
}
right, err := s.deserializeSubtree(common.Hash{}, remainingDepth-1, rightPos, nodeDepth+1, bitmap, hashData, hashIdx, true, dirty)
if err != nil {
return emptyRef, err
}
// If both children are empty, return Empty
if left.IsEmpty() && right.IsEmpty() {
return emptyRef, nil
}
ref := s.newInternalRef(nodeDepth)
node := s.getInternal(ref.Index())
node.left = left
node.right = right
node.mustRecompute = mustRecompute
if !mustRecompute {
// mustRecompute will only be false for the root of the subtree,
// for which we already know the hash.
node.hash = hn
node.mustRecompute = false
}
node.dirty = dirty
return ref, nil
}
func (s *nodeStore) decodeNode(serialized []byte, depth int, hn common.Hash, mustRecompute, dirty bool) (nodeRef, error) {
if len(serialized) == 0 {
return emptyRef, nil
@ -170,31 +288,23 @@ func (s *nodeStore) decodeNode(serialized []byte, depth int, hn common.Hash, mus
switch serialized[0] {
case nodeTypeInternal:
if len(serialized) != NodeTypeBytes+2*HashSize {
// Grouped format: 1 byte type + 1 byte group depth + variable bitmap + N×32 byte hashes
if len(serialized) < NodeTypeBytes+1 {
return emptyRef, errInvalidSerializedLength
}
var leftHash, rightHash common.Hash
copy(leftHash[:], serialized[NodeTypeBytes:NodeTypeBytes+HashSize])
copy(rightHash[:], serialized[NodeTypeBytes+HashSize:])
groupDepth := int(serialized[1])
if groupDepth < 1 || groupDepth > MaxGroupDepth {
return 0, errors.New("invalid group depth")
}
bitmapSize := bitmapSizeForDepth(groupDepth)
if len(serialized) < NodeTypeBytes+1+bitmapSize {
return 0, errInvalidSerializedLength
}
bitmap := serialized[2 : 2+bitmapSize]
hashData := serialized[2+bitmapSize:]
var leftRef, rightRef nodeRef
if leftHash != (common.Hash{}) {
leftRef = s.newHashedRef(leftHash)
}
if rightHash != (common.Hash{}) {
rightRef = s.newHashedRef(rightHash)
}
ref := s.newInternalRef(depth)
node := s.getInternal(ref.Index())
node.left = leftRef
node.right = rightRef
if !mustRecompute {
node.hash = hn
node.mustRecompute = false
}
node.dirty = dirty
return ref, nil
hashIdx := 0
return s.deserializeSubtree(hn, groupDepth, 0, depth, bitmap, hashData, &hashIdx, mustRecompute, dirty)
case nodeTypeStem:
if len(serialized) < NodeTypeBytes+StemSize+StemBitmapSize {
@ -230,45 +340,112 @@ func (s *nodeStore) decodeNode(serialized []byte, depth int, hn common.Hash, mus
// CollectNodes flushes every node that needs flushing via flushfn in post-order.
// Invariant: any ancestor of a node that needs flushing is itself marked, so a
// clean root means the whole subtree is clean.
func (s *nodeStore) collectNodes(ref nodeRef, path []byte, flushfn nodeFlushFn) error {
func (s *nodeStore) collectNodes(ref nodeRef, path []byte, flushfn nodeFlushFn, groupDepth int) {
switch ref.Kind() {
case kindEmpty:
return nil
case kindInternal:
node := s.getInternal(ref.Index())
if !node.dirty {
return nil
return
}
// Reuse path buffer across children: flushfn consumers
// (NodeSet.AddNode, tracer.Get) clone via string(path), so in-place
// mutation is safe.
path = append(path, 0)
if err := s.collectNodes(node.left, path, flushfn); err != nil {
return err
// Only flush at group boundaries (depth % groupDepth == 0)
if int(node.depth)%groupDepth == 0 {
// We're at a group boundary - first collect any nodes in deeper groups,
// then flush this group
s.collectChildGroups(node, path, flushfn, groupDepth, groupDepth-1)
flushfn(path, s.computeHash(ref), s.serializeNode(ref, groupDepth))
node.dirty = false
return
}
path[len(path)-1] = 1
if err := s.collectNodes(node.right, path, flushfn); err != nil {
return err
}
path = path[:len(path)-1]
flushfn(path, s.computeHash(ref), s.serializeNode(ref))
node.dirty = false
return nil
// Not at a group boundary - this shouldn't happen if we're called correctly from root
// but handle it by continuing to traverse
s.collectChildGroups(node, path, flushfn, groupDepth, groupDepth-(int(node.depth)%groupDepth)-1)
case kindStem:
sn := s.getStem(ref.Index())
if !sn.dirty {
return nil
return
}
flushfn(path, s.computeHash(ref), s.serializeNode(ref))
flushfn(path, s.computeHash(ref), s.serializeNode(ref, groupDepth))
sn.dirty = false
return nil
case kindHashed:
return nil // Already committed
case kindHashed, kindEmpty:
default:
return fmt.Errorf("CollectNodes: unexpected kind %d", ref.Kind())
panic(fmt.Sprintf("CollectNodes: unexpected kind %d", ref.Kind()))
}
}
// collectChildGroups traverses within a group to find and collect nodes in the next group.
// remainingLevels is how many more levels below the current node until we reach the group boundary.
// When remainingLevels=0, the current node's children are at the next group boundary.
func (s *nodeStore) collectChildGroups(node *InternalNode, path []byte, flushfn nodeFlushFn, groupDepth int, remainingLevels int) error {
if remainingLevels == 0 {
// Current node is at depth (groupBoundary - 1), its children are at the next group boundary
if !node.left.IsEmpty() {
s.collectNodes(node.left, appendBit(path, 0), flushfn, groupDepth)
}
if !node.right.IsEmpty() {
s.collectNodes(node.right, appendBit(path, 1), flushfn, groupDepth)
}
return nil
}
if !node.left.IsEmpty() {
switch node.left.Kind() {
case kindInternal:
n := s.getInternal(node.left.Index())
if err := s.collectChildGroups(n, appendBit(path, 0), flushfn, groupDepth, remainingLevels-1); err != nil {
return err
}
default:
extPath := s.extendPathToGroupLeaf(appendBit(path, 0), node.left, remainingLevels)
s.collectNodes(node.left, extPath, flushfn, groupDepth)
}
}
if !node.right.IsEmpty() {
switch node.right.Kind() {
case kindInternal:
n := s.getInternal(node.right.Index())
if err := s.collectChildGroups(n, appendBit(path, 1), flushfn, groupDepth, remainingLevels-1); err != nil {
return err
}
default:
extPath := s.extendPathToGroupLeaf(appendBit(path, 1), node.right, remainingLevels)
s.collectNodes(node.right, extPath, flushfn, groupDepth)
}
}
return nil
}
// extendPathToGroupLeaf extends a storage path to the group's leaf boundary,
// matching the projection done by serializeSubtree. For StemNodes, the path
// is extended using the stem's key bits (same as serializeSubtree). For other
// node types, the path is extended with all-zero (left) bits.
func (s *nodeStore) extendPathToGroupLeaf(path []byte, node nodeRef, remainingLevels int) []byte {
if remainingLevels <= 0 {
return path
}
if node.Kind() == kindStem {
sn := s.getStem(node.Index())
for _ = range remainingLevels {
bit := sn.Stem[len(path)/8] >> (7 - (len(path) % 8)) & 1
path = appendBit(path, bit)
}
} else {
// HashedNode or other: all-left extension (matches serializeSubtree's
// position << remainingDepth behavior).
for _ = range remainingLevels {
path = appendBit(path, 0)
}
}
return path
}
// appendBit appends a bit to a path, returning a new slice
func appendBit(path []byte, bit byte) []byte {
var p [256]byte
copy(p[:], path)
result := p[:len(path)]
return append(result, bit)
}
func (s *nodeStore) toDot(ref nodeRef, parent, path string) string {
switch ref.Kind() {
case kindInternal:

View file

@ -107,9 +107,14 @@ func ChunkifyCode(code []byte) ChunkedCode {
// BinaryTrie is the implementation of https://eips.ethereum.org/EIPS/eip-7864.
type BinaryTrie struct {
store *nodeStore
reader *trie.Reader
tracer *trie.PrevalueTracer
store *nodeStore
reader *trie.Reader
tracer *trie.PrevalueTracer
groupDepth int // Number of levels per serialized group (1-8, default 8)
}
func (t *BinaryTrie) GroupDepth() int {
return t.groupDepth
}
// ToDot converts the binary trie to a DOT language representation. Useful for debugging.
@ -119,15 +124,20 @@ func (t *BinaryTrie) ToDot() string {
}
// NewBinaryTrie creates a new binary trie.
func NewBinaryTrie(root common.Hash, db database.NodeDatabase) (*BinaryTrie, error) {
// groupDepth specifies the number of levels per serialized group (1-8).
func NewBinaryTrie(root common.Hash, db database.NodeDatabase, groupDepth int) (*BinaryTrie, error) {
if groupDepth < 1 || groupDepth > MaxGroupDepth {
panic("invalid group depth size")
}
reader, err := trie.NewReader(root, common.Hash{}, db)
if err != nil {
return nil, err
}
t := &BinaryTrie{
store: newNodeStore(),
reader: reader,
tracer: trie.NewPrevalueTracer(),
store: newNodeStore(),
reader: reader,
tracer: trie.NewPrevalueTracer(),
groupDepth: groupDepth,
}
// Parse the root node if it's not empty
if root != types.EmptyBinaryHash && root != types.EmptyRootHash {
@ -312,12 +322,9 @@ func (t *BinaryTrie) Commit(_ bool) (common.Hash, *trienode.NodeSet) {
// Pre-size the path buffer: collectNodes reuses it in-place via
// append/truncate; 32 covers typical binary-trie depth without regrowth.
pathBuf := make([]byte, 0, 32)
err := t.store.collectNodes(t.store.root, pathBuf, func(path []byte, hash common.Hash, serialized []byte) {
t.store.collectNodes(t.store.root, pathBuf, func(path []byte, hash common.Hash, serialized []byte) {
nodeset.AddNode(path, trienode.NewNodeWithPrev(hash, serialized, t.tracer.Get(path)))
})
if err != nil {
panic(fmt.Errorf("CollectNodes failed: %v", err))
}
}, t.groupDepth)
return t.Hash(), nodeset
}
@ -341,9 +348,10 @@ func (t *BinaryTrie) Prove(key []byte, proofDb ethdb.KeyValueWriter) error {
// Copy creates a deep copy of the trie.
func (t *BinaryTrie) Copy() *BinaryTrie {
return &BinaryTrie{
store: t.store.Copy(),
reader: t.reader,
tracer: t.tracer.Copy(),
store: t.store.Copy(),
reader: t.reader,
tracer: t.tracer.Copy(),
groupDepth: t.groupDepth,
}
}

View file

@ -768,8 +768,9 @@ func TestGetStorageNonMembershipInternalRoot(t *testing.T) {
// flushes only the root-to-leaf path.
func TestCommitSkipCleanSubtrees(t *testing.T) {
tr := &BinaryTrie{
store: newNodeStore(),
tracer: trie.NewPrevalueTracer(),
store: newNodeStore(),
tracer: trie.NewPrevalueTracer(),
groupDepth: 1,
}
const n = 200
key := func(i int) [HashSize]byte {

View file

@ -31,12 +31,15 @@ import (
// Config defines all necessary options for database.
type Config struct {
Preimages bool // Flag whether the preimage of node key is recorded
IsUBT bool // Flag whether the db is holding a verkle tree
HashDB *hashdb.Config // Configs for hash-based scheme
PathDB *pathdb.Config // Configs for experimental path-based scheme
Preimages bool // Flag whether the preimage of node key is recorded
IsUBT bool // Flag whether the db is holding a unified binary tree
BinTrieGroupDepth int // Number of levels per serialized group in binary trie (1-8, default 8)
HashDB *hashdb.Config // Configs for hash-based scheme
PathDB *pathdb.Config // Configs for experimental path-based scheme
}
const DefaultBinTrieGroupDepth = 5
// HashDefaults represents a config for using hash-based scheme with
// default settings.
var HashDefaults = &Config{
@ -45,12 +48,13 @@ var HashDefaults = &Config{
HashDB: hashdb.Defaults,
}
// UBTDefaults represents a config for holding verkle trie data
// UBTDefaults represents a config for holding unified binary trie data
// using path-based scheme with default settings.
var UBTDefaults = &Config{
Preimages: false,
IsUBT: true,
PathDB: pathdb.Defaults,
Preimages: false,
IsUBT: true,
BinTrieGroupDepth: DefaultBinTrieGroupDepth,
PathDB: pathdb.Defaults,
}
// backend defines the methods needed to access/update trie nodes in different
@ -393,3 +397,7 @@ func (db *Database) SnapshotCompleted() bool {
}
return pdb.SnapshotCompleted()
}
func (db *Database) BinTrieGroupDepth() int {
return db.config.BinTrieGroupDepth
}

View file

@ -161,7 +161,19 @@ func loadGenerator(db ethdb.KeyValueReader, hash nodeHasher) (*journalGenerator,
// loadLayers loads a pre-existing state layer backed by a key-value store.
func (db *Database) loadLayers() layer {
// Retrieve the root node of persistent state.
root, err := db.hasher(rawdb.ReadAccountTrieNode(db.diskdb, nil))
var (
root common.Hash
err error
)
if db.isUBT {
root = rawdb.ReadSnapshotRoot(db.diskdb)
if root == (common.Hash{}) {
root = types.EmptyBinaryHash
}
} else {
blob := rawdb.ReadAccountTrieNode(db.diskdb, nil)
root, err = db.hasher(blob)
}
if err != nil {
log.Crit("Failed to compute node hash", "err", err)
}