diff --git a/cmd/evm/internal/t8ntool/transition.go b/cmd/evm/internal/t8ntool/transition.go index e0bb3a449d..89b703d3b8 100644 --- a/cmd/evm/internal/t8ntool/transition.go +++ b/cmd/evm/internal/t8ntool/transition.go @@ -546,7 +546,7 @@ func BinKeys(ctx *cli.Context) error { db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), triedb.UBTDefaults) defer db.Close() - bt, err := genBinTrieFromAlloc(alloc, db) + bt, err := genBinTrieFromAlloc(alloc, db, triedb.UBTDefaults.BinTrieGroupDepth) if err != nil { return fmt.Errorf("error generating bt: %w", err) } @@ -590,7 +590,7 @@ func BinTrieRoot(ctx *cli.Context) error { db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), triedb.UBTDefaults) defer db.Close() - bt, err := genBinTrieFromAlloc(alloc, db) + bt, err := genBinTrieFromAlloc(alloc, db, triedb.UBTDefaults.BinTrieGroupDepth) if err != nil { return fmt.Errorf("error generating bt: %w", err) } @@ -600,8 +600,8 @@ func BinTrieRoot(ctx *cli.Context) error { } // TODO(@CPerezz): Should this go to `bintrie` module? -func genBinTrieFromAlloc(alloc core.GenesisAlloc, db database.NodeDatabase) (*bintrie.BinaryTrie, error) { - bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, db) +func genBinTrieFromAlloc(alloc core.GenesisAlloc, db database.NodeDatabase, groupDepth int) (*bintrie.BinaryTrie, error) { + bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, db, groupDepth) if err != nil { return nil, err } diff --git a/cmd/geth/bintrie_convert.go b/cmd/geth/bintrie_convert.go index 43d2e629ac..46cb3aa7e4 100644 --- a/cmd/geth/bintrie_convert.go +++ b/cmd/geth/bintrie_convert.go @@ -151,7 +151,7 @@ func convertToBinaryTrie(ctx *cli.Context) error { }) defer destTriedb.Close() - binTrie, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb) + binTrie, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb, ctx.Int(utils.BinTrieGroupDepthFlag.Name)) if err != nil { return fmt.Errorf("failed to create binary trie: %w", err) } @@ -319,7 +319,7 @@ func commitBinaryTrie(bt *bintrie.BinaryTrie, currentRoot common.Hash, destDB *t runtime.GC() debug.FreeOSMemory() - bt, err := bintrie.NewBinaryTrie(newRoot, destDB) + bt, err := bintrie.NewBinaryTrie(newRoot, destDB, bt.GroupDepth()) if err != nil { return nil, common.Hash{}, fmt.Errorf("failed to reload binary trie: %w", err) } diff --git a/cmd/geth/bintrie_convert_test.go b/cmd/geth/bintrie_convert_test.go index 50ae752358..32e8c7e55b 100644 --- a/cmd/geth/bintrie_convert_test.go +++ b/cmd/geth/bintrie_convert_test.go @@ -87,7 +87,7 @@ func TestBintrieConvert(t *testing.T) { }) defer destTriedb.Close() - bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb) + bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb, 8) if err != nil { t.Fatalf("failed to create binary trie: %v", err) } @@ -98,7 +98,7 @@ func TestBintrieConvert(t *testing.T) { } t.Logf("Binary trie root: %x", currentRoot) - bt2, err := bintrie.NewBinaryTrie(currentRoot, destTriedb) + bt2, err := bintrie.NewBinaryTrie(currentRoot, destTriedb, 8) if err != nil { t.Fatalf("failed to reload binary trie: %v", err) } @@ -194,7 +194,7 @@ func TestBintrieConvertDeleteSource(t *testing.T) { PathDB: pathdb.Defaults, }) - bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb) + bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb, 8) if err != nil { t.Fatalf("failed to create binary trie: %v", err) } @@ -209,7 +209,7 @@ func TestBintrieConvertDeleteSource(t *testing.T) { } srcTriedb2.Close() - bt2, err := bintrie.NewBinaryTrie(newRoot, destTriedb) + bt2, err := bintrie.NewBinaryTrie(newRoot, destTriedb, 8) if err != nil { t.Fatalf("failed to reload binary trie after deletion: %v", err) } diff --git a/cmd/geth/main.go b/cmd/geth/main.go index ae869ec970..c8d7abc65b 100644 --- a/cmd/geth/main.go +++ b/cmd/geth/main.go @@ -95,6 +95,7 @@ var ( utils.StateHistoryFlag, utils.TrienodeHistoryFlag, utils.TrienodeHistoryFullValueCheckpointFlag, + utils.BinTrieGroupDepthFlag, utils.LightKDFFlag, utils.EthRequiredBlocksFlag, utils.LegacyWhitelistFlag, // deprecated diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index 9d996f15cb..cc4c3bff5c 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -297,6 +297,12 @@ var ( Value: ethconfig.Defaults.EnableStateSizeTracking, Category: flags.StateCategory, } + BinTrieGroupDepthFlag = &cli.IntFlag{ + Name: "bintrie.groupdepth", + Usage: "Number of levels per serialized group in binary trie (1-8, default 5). Lower values create smaller groups with more nodes.", + Value: 5, + Category: flags.StateCategory, + } StateHistoryFlag = &cli.Uint64Flag{ Name: "history.state", Usage: "Number of recent blocks to retain state history for, only relevant in state.scheme=path (default = 90,000 blocks, 0 = entire chain)", @@ -1817,6 +1823,9 @@ func SetEthConfig(ctx *cli.Context, stack *node.Node, cfg *ethconfig.Config) { if ctx.IsSet(TrienodeHistoryFullValueCheckpointFlag.Name) { cfg.NodeFullValueCheckpoint = uint32(ctx.Uint(TrienodeHistoryFullValueCheckpointFlag.Name)) } + if ctx.IsSet(BinTrieGroupDepthFlag.Name) { + cfg.BinTrieGroupDepth = ctx.Int(BinTrieGroupDepthFlag.Name) + } if ctx.IsSet(StateSchemeFlag.Name) { cfg.StateScheme = ctx.String(StateSchemeFlag.Name) } @@ -2433,6 +2442,7 @@ func MakeChain(ctx *cli.Context, stack *node.Node, readonly bool) (*core.BlockCh StateHistory: ctx.Uint64(StateHistoryFlag.Name), TrienodeHistory: ctx.Int64(TrienodeHistoryFlag.Name), NodeFullValueCheckpoint: uint32(ctx.Uint(TrienodeHistoryFullValueCheckpointFlag.Name)), + BinTrieGroupDepth: ctx.Int(BinTrieGroupDepthFlag.Name), // Disable transaction indexing/unindexing. TxLookupLimit: -1, diff --git a/core/bintrie_witness_test.go b/core/bintrie_witness_test.go index 1b033151d3..66feef0675 100644 --- a/core/bintrie_witness_test.go +++ b/core/bintrie_witness_test.go @@ -92,6 +92,7 @@ func TestProcessUBT(t *testing.T) { // genesis := gspec.MustCommit(bcdb, triedb) options := DefaultConfig().WithStateScheme(rawdb.PathScheme) options.SnapshotLimit = 0 + options.BinTrieGroupDepth = triedb.DefaultBinTrieGroupDepth blockchain, _ := NewBlockChain(bcdb, gspec, beacon.New(ethash.NewFaker()), options) defer blockchain.Stop() @@ -218,6 +219,7 @@ func TestProcessParentBlockHash(t *testing.T) { t.Run("UBT", func(t *testing.T) { db := rawdb.NewMemoryDatabase() cacheConfig := DefaultConfig().WithStateScheme(rawdb.PathScheme) + cacheConfig.BinTrieGroupDepth = triedb.DefaultBinTrieGroupDepth cacheConfig.SnapshotLimit = 0 triedb := triedb.NewDatabase(db, cacheConfig.triedbConfig(true)) statedb, _ := state.New(types.EmptyBinaryHash, state.NewDatabase(triedb, nil)) diff --git a/core/blockchain.go b/core/blockchain.go index 296ef6bc16..f21a1462ea 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -170,9 +170,10 @@ type BlockChainConfig struct { TrieNoAsyncFlush bool // Whether the asynchronous buffer flushing is disallowed TrieJournalDirectory string // Directory path to the journal used for persisting trie data across node restarts - Preimages bool // Whether to store preimage of trie key to the disk - StateScheme string // Scheme used to store ethereum states and merkle tree nodes on top - ArchiveMode bool // Whether to enable the archive mode + Preimages bool // Whether to store preimage of trie key to the disk + StateScheme string // Scheme used to store ethereum states and merkle tree nodes on top + ArchiveMode bool // Whether to enable the archive mode + BinTrieGroupDepth int // Number of levels per serialized group in binary trie (1-8) // Number of blocks from the chain head for which state histories are retained. // If set to 0, all state histories across the entire chain will be retained; @@ -260,8 +261,9 @@ func (cfg BlockChainConfig) WithNoAsyncFlush(on bool) *BlockChainConfig { // triedbConfig derives the configures for trie database. func (cfg *BlockChainConfig) triedbConfig(isUBT bool) *triedb.Config { config := &triedb.Config{ - Preimages: cfg.Preimages, - IsUBT: isUBT, + Preimages: cfg.Preimages, + IsUBT: isUBT, + BinTrieGroupDepth: cfg.BinTrieGroupDepth, } if cfg.StateScheme == rawdb.HashScheme { config.HashDB = &hashdb.Config{ diff --git a/core/genesis.go b/core/genesis.go index d77ea10d8c..6a0affa52e 100644 --- a/core/genesis.go +++ b/core/genesis.go @@ -136,8 +136,9 @@ func hashAlloc(ga *types.GenesisAlloc, isUBT bool) (common.Hash, error) { var config *triedb.Config if isUBT { config = &triedb.Config{ - PathDB: pathdb.Defaults, - IsUBT: true, + PathDB: pathdb.Defaults, + IsUBT: true, + BinTrieGroupDepth: triedb.UBTDefaults.BinTrieGroupDepth, } } // Create an ephemeral in-memory database for computing hash, diff --git a/core/genesis_test.go b/core/genesis_test.go index e15ad00222..94f1b3a4fd 100644 --- a/core/genesis_test.go +++ b/core/genesis_test.go @@ -261,9 +261,9 @@ func newDbConfig(scheme string) *triedb.Config { return &triedb.Config{PathDB: &config} } -func TestVerkleGenesisCommit(t *testing.T) { - var verkleTime uint64 = 0 - verkleConfig := ¶ms.ChainConfig{ +func TestBinaryGenesisCommit(t *testing.T) { + var ubtTime uint64 = 0 + ubtConfig := ¶ms.ChainConfig{ ChainID: big.NewInt(1), HomesteadBlock: big.NewInt(0), DAOForkBlock: nil, @@ -281,11 +281,11 @@ func TestVerkleGenesisCommit(t *testing.T) { ArrowGlacierBlock: big.NewInt(0), GrayGlacierBlock: big.NewInt(0), MergeNetsplitBlock: nil, - ShanghaiTime: &verkleTime, - CancunTime: &verkleTime, - PragueTime: &verkleTime, - OsakaTime: &verkleTime, - UBTTime: &verkleTime, + ShanghaiTime: &ubtTime, + CancunTime: &ubtTime, + PragueTime: &ubtTime, + OsakaTime: &ubtTime, + UBTTime: &ubtTime, TerminalTotalDifficulty: big.NewInt(0), EnableUBTAtGenesis: true, Ethash: nil, @@ -300,8 +300,8 @@ func TestVerkleGenesisCommit(t *testing.T) { genesis := &Genesis{ BaseFee: big.NewInt(params.InitialBaseFee), - Config: verkleConfig, - Timestamp: verkleTime, + Config: ubtConfig, + Timestamp: ubtTime, Difficulty: big.NewInt(0), Alloc: types.GenesisAlloc{ {1}: {Balance: big.NewInt(1), Storage: map[common.Hash]common.Hash{{1}: {1}}}, @@ -320,17 +320,18 @@ func TestVerkleGenesisCommit(t *testing.T) { config.NoAsyncFlush = true triedb := triedb.NewDatabase(db, &triedb.Config{ - IsUBT: true, - PathDB: &config, + IsUBT: true, + PathDB: &config, + BinTrieGroupDepth: triedb.DefaultBinTrieGroupDepth, }) block := genesis.MustCommit(db, triedb) if !bytes.Equal(block.Root().Bytes(), expected) { t.Fatalf("invalid genesis state root, expected %x, got %x", expected, block.Root()) } - // Test that the trie is verkle + // Test that the trie is a unified binary trie if !triedb.IsUBT() { - t.Fatalf("expected trie to be verkle") + t.Fatalf("expected trie to be a unified binary trie") } vdb := rawdb.NewTable(db, string(rawdb.VerklePrefix)) if !rawdb.HasAccountTrieNode(vdb, nil) { diff --git a/core/state/database_ubt.go b/core/state/database_ubt.go index 718d93df87..16579f6d6a 100644 --- a/core/state/database_ubt.go +++ b/core/state/database_ubt.go @@ -96,7 +96,7 @@ func (db *UBTDatabase) ReadersWithCacheStats(stateRoot common.Hash) (Reader, Rea // OpenTrie opens the main account trie at a specific root hash. func (db *UBTDatabase) OpenTrie(root common.Hash) (Trie, error) { - return bintrie.NewBinaryTrie(root, db.triedb) + return bintrie.NewBinaryTrie(root, db.triedb, db.triedb.BinTrieGroupDepth()) } // OpenStorageTrie opens the storage trie of an account. In binary trie mode, diff --git a/core/state/reader.go b/core/state/reader.go index 5df0acbb9b..be07cec0f9 100644 --- a/core/state/reader.go +++ b/core/state/reader.go @@ -255,7 +255,7 @@ type ubtTrieReader struct { // newUBTTrieReader constructs a Unified-binary-trie reader of the specific state. // An error will be returned if the associated trie specified by root is not existent. func newUBTTrieReader(root common.Hash, db *triedb.Database) (*ubtTrieReader, error) { - binTrie, binErr := bintrie.NewBinaryTrie(root, db) + binTrie, binErr := bintrie.NewBinaryTrie(root, db, db.BinTrieGroupDepth()) if binErr != nil { return nil, binErr } diff --git a/eth/backend.go b/eth/backend.go index 08a3c70c9d..6cfd1f6fa0 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -237,6 +237,7 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { StateHistory: config.StateHistory, TrienodeHistory: config.TrienodeHistory, NodeFullValueCheckpoint: config.NodeFullValueCheckpoint, + BinTrieGroupDepth: config.BinTrieGroupDepth, StateScheme: scheme, HistoryPolicy: histPolicy, TxLookupLimit: int64(min(config.TransactionHistory, math.MaxInt64)), diff --git a/eth/ethconfig/config.go b/eth/ethconfig/config.go index dd7436bf52..b51b78e199 100644 --- a/eth/ethconfig/config.go +++ b/eth/ethconfig/config.go @@ -35,6 +35,7 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/miner" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/triedb" "github.com/ethereum/go-ethereum/triedb/pathdb" ) @@ -59,6 +60,7 @@ var Defaults = Config{ StateHistory: pathdb.Defaults.StateHistory, TrienodeHistory: pathdb.Defaults.TrienodeHistory, NodeFullValueCheckpoint: pathdb.Defaults.FullValueCheckpoint, + BinTrieGroupDepth: triedb.DefaultBinTrieGroupDepth, DatabaseCache: 2048, TrieCleanCache: 614, TrieDirtyCache: 1024, @@ -125,6 +127,11 @@ type Config struct { // consistent with persistent state. StateScheme string `toml:",omitempty"` + // BinTrieGroupDepth is the number of levels per serialized group in binary trie. + // Valid values are 1-8, with 8 being the default (byte-aligned groups). + // Lower values create smaller groups with more nodes. + BinTrieGroupDepth int `toml:",omitempty"` + // RequiredBlocks is a set of block number -> hash mappings which must be in the // canonical chain of all remote peers. Setting the option makes geth verify the // presence of these blocks for every new peer connection. diff --git a/eth/ethconfig/gen_config.go b/eth/ethconfig/gen_config.go index ed85562f44..c5e45348be 100644 --- a/eth/ethconfig/gen_config.go +++ b/eth/ethconfig/gen_config.go @@ -34,6 +34,7 @@ func (c Config) MarshalTOML() (interface{}, error) { TrienodeHistory int64 `toml:",omitempty"` NodeFullValueCheckpoint uint32 `toml:",omitempty"` StateScheme string `toml:",omitempty"` + BinTrieGroupDepth int `toml:",omitempty"` RequiredBlocks map[uint64]common.Hash `toml:"-"` SlowBlockThreshold time.Duration `toml:",omitempty"` SkipBcVersionCheck bool `toml:"-"` @@ -87,6 +88,7 @@ func (c Config) MarshalTOML() (interface{}, error) { enc.TrienodeHistory = c.TrienodeHistory enc.NodeFullValueCheckpoint = c.NodeFullValueCheckpoint enc.StateScheme = c.StateScheme + enc.BinTrieGroupDepth = c.BinTrieGroupDepth enc.RequiredBlocks = c.RequiredBlocks enc.SlowBlockThreshold = c.SlowBlockThreshold enc.SkipBcVersionCheck = c.SkipBcVersionCheck @@ -144,6 +146,7 @@ func (c *Config) UnmarshalTOML(unmarshal func(interface{}) error) error { TrienodeHistory *int64 `toml:",omitempty"` NodeFullValueCheckpoint *uint32 `toml:",omitempty"` StateScheme *string `toml:",omitempty"` + BinTrieGroupDepth *int `toml:",omitempty"` RequiredBlocks map[uint64]common.Hash `toml:"-"` SlowBlockThreshold *time.Duration `toml:",omitempty"` SkipBcVersionCheck *bool `toml:"-"` @@ -234,6 +237,9 @@ func (c *Config) UnmarshalTOML(unmarshal func(interface{}) error) error { if dec.StateScheme != nil { c.StateScheme = *dec.StateScheme } + if dec.BinTrieGroupDepth != nil { + c.BinTrieGroupDepth = *dec.BinTrieGroupDepth + } if dec.RequiredBlocks != nil { c.RequiredBlocks = dec.RequiredBlocks } diff --git a/trie/bintrie/binary_node.go b/trie/bintrie/binary_node.go index e7f57d45a2..3516bf6bd5 100644 --- a/trie/bintrie/binary_node.go +++ b/trie/bintrie/binary_node.go @@ -27,8 +27,19 @@ const ( NodeTypeBytes = 1 // Size of node type prefix in serialization HashSize = 32 // Size of a hash in bytes StemBitmapSize = 32 // Size of the bitmap in a stem node (256 values = 32 bytes) + + MaxGroupDepth = 8 ) +// bitmapSizeForDepth returns the bitmap size in bytes for a given group depth. +// For depths 1-3, returns 1 byte. For depths 4-8, returns 2^(depth-3) bytes. +func bitmapSizeForDepth(groupDepth int) int { + if groupDepth <= 3 { + return 1 + } + return 1 << (groupDepth - 3) +} + const ( nodeTypeStem = iota + 1 nodeTypeInternal diff --git a/trie/bintrie/binary_node_test.go b/trie/bintrie/binary_node_test.go index 12ac199903..857060a0c0 100644 --- a/trie/bintrie/binary_node_test.go +++ b/trie/bintrie/binary_node_test.go @@ -23,8 +23,8 @@ import ( "github.com/ethereum/go-ethereum/common" ) -// TestSerializeDeserializeInternalNode tests flat 65-byte serialization and -// deserialization of InternalNode through nodeStore. +// TestSerializeDeserializeInternalNode tests grouped serialization and +// deserialization of InternalNode through nodeStore at groupDepth=1. func TestSerializeDeserializeInternalNode(t *testing.T) { leftHash := common.HexToHash("0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef") rightHash := common.HexToHash("0xfedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321") @@ -39,24 +39,32 @@ func TestSerializeDeserializeInternalNode(t *testing.T) { rootNode.right = rightRef s.root = rootRef - // Serialize the node — flat 65-byte format - serialized := s.serializeNode(rootRef) + // Serialize the node — grouped format at groupDepth=1: + // [type(1)][groupDepth(1)][bitmap(1)][leftHash(32)][rightHash(32)] = 67 bytes + serialized := s.serializeNode(rootRef, 1) - // Check the serialized format: [type(1)][leftHash(32)][rightHash(32)] if serialized[0] != nodeTypeInternal { t.Errorf("Expected type byte to be %d, got %d", nodeTypeInternal, serialized[0]) } + if serialized[1] != 1 { + t.Errorf("Expected groupDepth byte to be 1, got %d", serialized[1]) + } - expectedLen := NodeTypeBytes + 2*HashSize // 1 + 64 = 65 + expectedLen := NodeTypeBytes + 1 + 1 + 2*HashSize // type + groupDepth + bitmap + 2 hashes = 67 if len(serialized) != expectedLen { t.Errorf("Expected serialized length to be %d, got %d", expectedLen, len(serialized)) } - // Check that left and right hashes are embedded directly - if !bytes.Equal(serialized[NodeTypeBytes:NodeTypeBytes+HashSize], leftHash[:]) { + // Both children present at a 1-level group → bitmap byte = 0b11000000. + if serialized[2] != 0xc0 { + t.Errorf("Expected bitmap byte 0xc0, got 0x%02x", serialized[2]) + } + + hashesStart := NodeTypeBytes + 1 + 1 + if !bytes.Equal(serialized[hashesStart:hashesStart+HashSize], leftHash[:]) { t.Error("Left hash not found at expected position") } - if !bytes.Equal(serialized[NodeTypeBytes+HashSize:], rightHash[:]) { + if !bytes.Equal(serialized[hashesStart+HashSize:], rightHash[:]) { t.Error("Right hash not found at expected position") } @@ -116,7 +124,7 @@ func TestSerializeDeserializeStemNode(t *testing.T) { } // Serialize the node - serialized := s.serializeNode(ref) + serialized := s.serializeNode(ref, 8) // Check the serialized format if serialized[0] != nodeTypeStem { @@ -195,8 +203,9 @@ func TestDeserializeInvalidType(t *testing.T) { // TestDeserializeInvalidLength tests deserialization with invalid data length. func TestDeserializeInvalidLength(t *testing.T) { s := newNodeStore() - // InternalNode with valid type byte but wrong length (needs exactly 65 bytes) - invalidData := []byte{nodeTypeInternal, 0, 0, 0} + // InternalNode group header with groupDepth=1 (valid) and a 1-byte bitmap + // announcing two present hashes, but the hash payload is missing. + invalidData := []byte{nodeTypeInternal, 1, 0xc0} _, err := s.deserializeNode(invalidData, 0) if err == nil { @@ -208,6 +217,21 @@ func TestDeserializeInvalidLength(t *testing.T) { } } +// TestDeserializeInvalidGroupDepth tests deserialization when the group depth +// byte is out of the supported 1..MaxGroupDepth range. +func TestDeserializeInvalidGroupDepth(t *testing.T) { + s := newNodeStore() + invalidData := []byte{nodeTypeInternal, 0, 0, 0} + + _, err := s.deserializeNode(invalidData, 0) + if err == nil { + t.Fatal("Expected error for invalid group depth, got nil") + } + if err.Error() != "invalid group depth" { + t.Errorf("Expected 'invalid group depth' error, got: %v", err) + } +} + // TestKeyToPath tests the keyToPath function. func TestKeyToPath(t *testing.T) { tests := []struct { diff --git a/trie/bintrie/hashed_node_test.go b/trie/bintrie/hashed_node_test.go index ae77b7c570..2e12bfba5e 100644 --- a/trie/bintrie/hashed_node_test.go +++ b/trie/bintrie/hashed_node_test.go @@ -95,7 +95,7 @@ func TestHashedNodeInsertValuesAtStem(t *testing.T) { sn.setValue(byte(i), v) } } - serialized := rs.serializeNode(ref) + serialized := rs.serializeNode(ref, 8) validResolver := func(path []byte, hash common.Hash) ([]byte, error) { return serialized, nil diff --git a/trie/bintrie/internal_node_test.go b/trie/bintrie/internal_node_test.go index 8d5a75de8c..4d8da8af37 100644 --- a/trie/bintrie/internal_node_test.go +++ b/trie/bintrie/internal_node_test.go @@ -90,7 +90,7 @@ func TestInternalNodeGetWithResolver(t *testing.T) { ref := rs.newStemRef(stem, 1) sn := rs.getStem(ref.Index()) sn.setValue(5, common.HexToHash("0xabcd").Bytes()) - return rs.serializeNode(ref), nil + return rs.serializeNode(ref, 8), nil } return nil, errors.New("node not found") } @@ -290,10 +290,7 @@ func TestInternalNodeCollectNodes(t *testing.T) { collectedPaths = append(collectedPaths, pathCopy) } - err := s.collectNodes(s.root, []byte{1}, flushFn) - if err != nil { - t.Fatalf("Failed to collect nodes: %v", err) - } + s.collectNodes(s.root, []byte{1}, flushFn, 8) // Should have collected 3 nodes: left stem, right stem, and the internal node itself if len(collectedPaths) != 3 { diff --git a/trie/bintrie/iterator.go b/trie/bintrie/iterator.go index 31645430c3..a920f91378 100644 --- a/trie/bintrie/iterator.go +++ b/trie/bintrie/iterator.go @@ -205,7 +205,7 @@ func (it *binaryNodeIterator) Path() []byte { } func (it *binaryNodeIterator) NodeBlob() []byte { - return it.store.serializeNode(it.current) + return it.store.serializeNode(it.current, it.trie.groupDepth) } // Leaf reports whether the iterator is currently positioned at a leaf value. diff --git a/trie/bintrie/stem_node_test.go b/trie/bintrie/stem_node_test.go index 5faf903fba..ae6b57ab34 100644 --- a/trie/bintrie/stem_node_test.go +++ b/trie/bintrie/stem_node_test.go @@ -320,10 +320,7 @@ func TestStemNodeCollectNodes(t *testing.T) { collectedPaths = append(collectedPaths, pathCopy) } - err := s.collectNodes(s.root, []byte{0, 1, 0}, flushFn) - if err != nil { - t.Fatalf("Failed to collect nodes: %v", err) - } + s.collectNodes(s.root, []byte{0, 1, 0}, flushFn, 8) // Should have collected one node (itself) if len(collectedPaths) != 1 { diff --git a/trie/bintrie/store_commit.go b/trie/bintrie/store_commit.go index 7101087b51..b14bffbc6c 100644 --- a/trie/bintrie/store_commit.go +++ b/trie/bintrie/store_commit.go @@ -107,18 +107,83 @@ func (s *nodeStore) hashInternal(idx uint32) common.Hash { return node.hash } -// SerializeNode serializes a node into the flat on-disk format. -func (s *nodeStore) serializeNode(ref nodeRef) []byte { +// serializeSubtree recursively collects child hashes from a subtree of InternalNodes. +// It traverses up to `remainingDepth` levels, storing hashes of bottom-layer children. +// position tracks the current index (0 to 2^groupDepth - 1) for bitmap placement. +// hashes collects the hashes of present children, bitmap tracks which positions are present. +func (s *nodeStore) serializeSubtree(ref nodeRef, remainingDepth int, position int, absoluteDepth int, bitmap []byte, hashes *[]common.Hash) { + if remainingDepth == 0 { + // Bottom layer: store hash if not empty + switch ref.Kind() { + case kindEmpty: + // Leave bitmap bit unset, don't add hash + return + default: + // StemNode, HashedNode, or InternalNode at boundary: store hash + bitmap[position/8] |= 1 << (7 - (position % 8)) + *hashes = append(*hashes, s.computeHash(ref)) + } + return + } + switch ref.Kind() { case kindInternal: + leftPos := position * 2 + rightPos := position*2 + 1 + s.serializeSubtree(s.getInternal(ref.Index()).left, remainingDepth-1, leftPos, absoluteDepth+1, bitmap, hashes) + s.serializeSubtree(s.getInternal(ref.Index()).right, remainingDepth-1, rightPos, absoluteDepth+1, bitmap, hashes) + case kindEmpty: + return + default: + // StemNode or HashedNode encountered before reaching the group's bottom + // layer. Compute the leaf bitmap position where this node's hash will + // be stored. + leafPos := position + switch ref.Kind() { + case kindStem: + sn := s.getStem(ref.Index()) + // Extend position using the stem's key bits so that + // GetValuesAtStem traversal (which follows key bits) finds the hash. + for d := 0; d < remainingDepth; d++ { + bit := sn.Stem[(absoluteDepth+d)/8] >> (7 - ((absoluteDepth + d) % 8)) & 1 + leafPos = leafPos*2 + int(bit) + } + default: + // HashedNode or unknown: extend all-left (no key bits available). + // This matches the all-zero path that resolveNode would follow. + leafPos = position << remainingDepth + } + bitmap[leafPos/8] |= 1 << (7 - (leafPos % 8)) + *hashes = append(*hashes, s.computeHash(ref)) + } +} + +// SerializeNode serializes a node into the flat on-disk format. +func (s *nodeStore) serializeNode(ref nodeRef, groupDepth int) []byte { + switch ref.Kind() { + case kindInternal: + // InternalNode group: 1 byte type + 1 byte group depth + variable bitmap + N×32 byte hashes + bitmapSize := bitmapSizeForDepth(groupDepth) + bitmap := make([]byte, bitmapSize) + var hashes []common.Hash + node := s.getInternal(ref.Index()) - var serialized [NodeTypeBytes + HashSize + HashSize]byte + s.serializeSubtree(ref, groupDepth, 0, int(node.depth), bitmap, &hashes) + + // Build serialized output + serializedLen := NodeTypeBytes + 1 + bitmapSize + len(hashes)*HashSize + serialized := make([]byte, serializedLen) serialized[0] = nodeTypeInternal - lh := s.computeHash(node.left) - rh := s.computeHash(node.right) - copy(serialized[NodeTypeBytes:NodeTypeBytes+HashSize], lh[:]) - copy(serialized[NodeTypeBytes+HashSize:], rh[:]) - return serialized[:] + serialized[1] = byte(groupDepth) // group depth => bitmap size for a sparse group + copy(serialized[2:2+bitmapSize], bitmap) + + offset := NodeTypeBytes + 1 + bitmapSize + for _, h := range hashes { + copy(serialized[offset:offset+HashSize], h.Bytes()) + offset += HashSize + } + + return serialized case kindStem: sn := s.getStem(ref.Index()) @@ -163,6 +228,59 @@ func (s *nodeStore) deserializeNodeWithHash(serialized []byte, depth int, hn com return s.decodeNode(serialized, depth, hn, false, false) } +// deserializeSubtree reconstructs an InternalNode subtree from grouped serialization. +// remainingDepth is how many more levels to build, position is current index in the bitmap, +// nodeDepth is the actual trie depth for the node being created. +// hashIdx tracks the current position in the hash data (incremented as hashes are consumed). +func (s *nodeStore) deserializeSubtree(hn common.Hash, remainingDepth int, position int, nodeDepth int, bitmap []byte, hashData []byte, hashIdx *int, mustRecompute bool, dirty bool) (nodeRef, error) { + if remainingDepth == 0 { + // Bottom layer: check bitmap and return HashedNode or Empty + if bitmap[position/8]>>(7-(position%8))&1 == 1 { + if len(hashData) < (*hashIdx+1)*HashSize { + return emptyRef, errInvalidSerializedLength + } + hash := common.BytesToHash(hashData[*hashIdx*HashSize : (*hashIdx+1)*HashSize]) + *hashIdx++ + return s.newHashedRef(hash), nil + } + return emptyRef, nil + } + + // Check if this entire subtree is empty by examining all relevant bitmap bits + leftPos := position * 2 + rightPos := position*2 + 1 + + // note that the parent might not need root computations, but the children + // do, because their hash isn't saved. Hence `mustRecompute` is set to `true`. + left, err := s.deserializeSubtree(common.Hash{}, remainingDepth-1, leftPos, nodeDepth+1, bitmap, hashData, hashIdx, true, dirty) + if err != nil { + return emptyRef, err + } + right, err := s.deserializeSubtree(common.Hash{}, remainingDepth-1, rightPos, nodeDepth+1, bitmap, hashData, hashIdx, true, dirty) + if err != nil { + return emptyRef, err + } + + // If both children are empty, return Empty + if left.IsEmpty() && right.IsEmpty() { + return emptyRef, nil + } + + ref := s.newInternalRef(nodeDepth) + node := s.getInternal(ref.Index()) + node.left = left + node.right = right + node.mustRecompute = mustRecompute + if !mustRecompute { + // mustRecompute will only be false for the root of the subtree, + // for which we already know the hash. + node.hash = hn + node.mustRecompute = false + } + node.dirty = dirty + return ref, nil +} + func (s *nodeStore) decodeNode(serialized []byte, depth int, hn common.Hash, mustRecompute, dirty bool) (nodeRef, error) { if len(serialized) == 0 { return emptyRef, nil @@ -170,31 +288,23 @@ func (s *nodeStore) decodeNode(serialized []byte, depth int, hn common.Hash, mus switch serialized[0] { case nodeTypeInternal: - if len(serialized) != NodeTypeBytes+2*HashSize { + // Grouped format: 1 byte type + 1 byte group depth + variable bitmap + N×32 byte hashes + if len(serialized) < NodeTypeBytes+1 { return emptyRef, errInvalidSerializedLength } - var leftHash, rightHash common.Hash - copy(leftHash[:], serialized[NodeTypeBytes:NodeTypeBytes+HashSize]) - copy(rightHash[:], serialized[NodeTypeBytes+HashSize:]) + groupDepth := int(serialized[1]) + if groupDepth < 1 || groupDepth > MaxGroupDepth { + return 0, errors.New("invalid group depth") + } + bitmapSize := bitmapSizeForDepth(groupDepth) + if len(serialized) < NodeTypeBytes+1+bitmapSize { + return 0, errInvalidSerializedLength + } + bitmap := serialized[2 : 2+bitmapSize] + hashData := serialized[2+bitmapSize:] - var leftRef, rightRef nodeRef - if leftHash != (common.Hash{}) { - leftRef = s.newHashedRef(leftHash) - } - if rightHash != (common.Hash{}) { - rightRef = s.newHashedRef(rightHash) - } - - ref := s.newInternalRef(depth) - node := s.getInternal(ref.Index()) - node.left = leftRef - node.right = rightRef - if !mustRecompute { - node.hash = hn - node.mustRecompute = false - } - node.dirty = dirty - return ref, nil + hashIdx := 0 + return s.deserializeSubtree(hn, groupDepth, 0, depth, bitmap, hashData, &hashIdx, mustRecompute, dirty) case nodeTypeStem: if len(serialized) < NodeTypeBytes+StemSize+StemBitmapSize { @@ -230,45 +340,112 @@ func (s *nodeStore) decodeNode(serialized []byte, depth int, hn common.Hash, mus // CollectNodes flushes every node that needs flushing via flushfn in post-order. // Invariant: any ancestor of a node that needs flushing is itself marked, so a // clean root means the whole subtree is clean. -func (s *nodeStore) collectNodes(ref nodeRef, path []byte, flushfn nodeFlushFn) error { +func (s *nodeStore) collectNodes(ref nodeRef, path []byte, flushfn nodeFlushFn, groupDepth int) { switch ref.Kind() { - case kindEmpty: - return nil case kindInternal: node := s.getInternal(ref.Index()) if !node.dirty { - return nil + return } - // Reuse path buffer across children: flushfn consumers - // (NodeSet.AddNode, tracer.Get) clone via string(path), so in-place - // mutation is safe. - path = append(path, 0) - if err := s.collectNodes(node.left, path, flushfn); err != nil { - return err + // Only flush at group boundaries (depth % groupDepth == 0) + if int(node.depth)%groupDepth == 0 { + // We're at a group boundary - first collect any nodes in deeper groups, + // then flush this group + s.collectChildGroups(node, path, flushfn, groupDepth, groupDepth-1) + flushfn(path, s.computeHash(ref), s.serializeNode(ref, groupDepth)) + node.dirty = false + return } - path[len(path)-1] = 1 - if err := s.collectNodes(node.right, path, flushfn); err != nil { - return err - } - path = path[:len(path)-1] - flushfn(path, s.computeHash(ref), s.serializeNode(ref)) - node.dirty = false - return nil + // Not at a group boundary - this shouldn't happen if we're called correctly from root + // but handle it by continuing to traverse + s.collectChildGroups(node, path, flushfn, groupDepth, groupDepth-(int(node.depth)%groupDepth)-1) case kindStem: sn := s.getStem(ref.Index()) if !sn.dirty { - return nil + return } - flushfn(path, s.computeHash(ref), s.serializeNode(ref)) + flushfn(path, s.computeHash(ref), s.serializeNode(ref, groupDepth)) sn.dirty = false - return nil - case kindHashed: - return nil // Already committed + case kindHashed, kindEmpty: default: - return fmt.Errorf("CollectNodes: unexpected kind %d", ref.Kind()) + panic(fmt.Sprintf("CollectNodes: unexpected kind %d", ref.Kind())) } } +// collectChildGroups traverses within a group to find and collect nodes in the next group. +// remainingLevels is how many more levels below the current node until we reach the group boundary. +// When remainingLevels=0, the current node's children are at the next group boundary. +func (s *nodeStore) collectChildGroups(node *InternalNode, path []byte, flushfn nodeFlushFn, groupDepth int, remainingLevels int) error { + if remainingLevels == 0 { + // Current node is at depth (groupBoundary - 1), its children are at the next group boundary + if !node.left.IsEmpty() { + s.collectNodes(node.left, appendBit(path, 0), flushfn, groupDepth) + } + if !node.right.IsEmpty() { + s.collectNodes(node.right, appendBit(path, 1), flushfn, groupDepth) + } + return nil + } + + if !node.left.IsEmpty() { + switch node.left.Kind() { + case kindInternal: + n := s.getInternal(node.left.Index()) + if err := s.collectChildGroups(n, appendBit(path, 0), flushfn, groupDepth, remainingLevels-1); err != nil { + return err + } + default: + extPath := s.extendPathToGroupLeaf(appendBit(path, 0), node.left, remainingLevels) + s.collectNodes(node.left, extPath, flushfn, groupDepth) + } + } + if !node.right.IsEmpty() { + switch node.right.Kind() { + case kindInternal: + n := s.getInternal(node.right.Index()) + if err := s.collectChildGroups(n, appendBit(path, 1), flushfn, groupDepth, remainingLevels-1); err != nil { + return err + } + default: + extPath := s.extendPathToGroupLeaf(appendBit(path, 1), node.right, remainingLevels) + s.collectNodes(node.right, extPath, flushfn, groupDepth) + } + } + return nil +} + +// extendPathToGroupLeaf extends a storage path to the group's leaf boundary, +// matching the projection done by serializeSubtree. For StemNodes, the path +// is extended using the stem's key bits (same as serializeSubtree). For other +// node types, the path is extended with all-zero (left) bits. +func (s *nodeStore) extendPathToGroupLeaf(path []byte, node nodeRef, remainingLevels int) []byte { + if remainingLevels <= 0 { + return path + } + if node.Kind() == kindStem { + sn := s.getStem(node.Index()) + for _ = range remainingLevels { + bit := sn.Stem[len(path)/8] >> (7 - (len(path) % 8)) & 1 + path = appendBit(path, bit) + } + } else { + // HashedNode or other: all-left extension (matches serializeSubtree's + // position << remainingDepth behavior). + for _ = range remainingLevels { + path = appendBit(path, 0) + } + } + return path +} + +// appendBit appends a bit to a path, returning a new slice +func appendBit(path []byte, bit byte) []byte { + var p [256]byte + copy(p[:], path) + result := p[:len(path)] + return append(result, bit) +} + func (s *nodeStore) toDot(ref nodeRef, parent, path string) string { switch ref.Kind() { case kindInternal: diff --git a/trie/bintrie/trie.go b/trie/bintrie/trie.go index 8c69e0aa00..e3436e3df1 100644 --- a/trie/bintrie/trie.go +++ b/trie/bintrie/trie.go @@ -107,9 +107,14 @@ func ChunkifyCode(code []byte) ChunkedCode { // BinaryTrie is the implementation of https://eips.ethereum.org/EIPS/eip-7864. type BinaryTrie struct { - store *nodeStore - reader *trie.Reader - tracer *trie.PrevalueTracer + store *nodeStore + reader *trie.Reader + tracer *trie.PrevalueTracer + groupDepth int // Number of levels per serialized group (1-8, default 8) +} + +func (t *BinaryTrie) GroupDepth() int { + return t.groupDepth } // ToDot converts the binary trie to a DOT language representation. Useful for debugging. @@ -119,15 +124,20 @@ func (t *BinaryTrie) ToDot() string { } // NewBinaryTrie creates a new binary trie. -func NewBinaryTrie(root common.Hash, db database.NodeDatabase) (*BinaryTrie, error) { +// groupDepth specifies the number of levels per serialized group (1-8). +func NewBinaryTrie(root common.Hash, db database.NodeDatabase, groupDepth int) (*BinaryTrie, error) { + if groupDepth < 1 || groupDepth > MaxGroupDepth { + panic("invalid group depth size") + } reader, err := trie.NewReader(root, common.Hash{}, db) if err != nil { return nil, err } t := &BinaryTrie{ - store: newNodeStore(), - reader: reader, - tracer: trie.NewPrevalueTracer(), + store: newNodeStore(), + reader: reader, + tracer: trie.NewPrevalueTracer(), + groupDepth: groupDepth, } // Parse the root node if it's not empty if root != types.EmptyBinaryHash && root != types.EmptyRootHash { @@ -312,12 +322,9 @@ func (t *BinaryTrie) Commit(_ bool) (common.Hash, *trienode.NodeSet) { // Pre-size the path buffer: collectNodes reuses it in-place via // append/truncate; 32 covers typical binary-trie depth without regrowth. pathBuf := make([]byte, 0, 32) - err := t.store.collectNodes(t.store.root, pathBuf, func(path []byte, hash common.Hash, serialized []byte) { + t.store.collectNodes(t.store.root, pathBuf, func(path []byte, hash common.Hash, serialized []byte) { nodeset.AddNode(path, trienode.NewNodeWithPrev(hash, serialized, t.tracer.Get(path))) - }) - if err != nil { - panic(fmt.Errorf("CollectNodes failed: %v", err)) - } + }, t.groupDepth) return t.Hash(), nodeset } @@ -341,9 +348,10 @@ func (t *BinaryTrie) Prove(key []byte, proofDb ethdb.KeyValueWriter) error { // Copy creates a deep copy of the trie. func (t *BinaryTrie) Copy() *BinaryTrie { return &BinaryTrie{ - store: t.store.Copy(), - reader: t.reader, - tracer: t.tracer.Copy(), + store: t.store.Copy(), + reader: t.reader, + tracer: t.tracer.Copy(), + groupDepth: t.groupDepth, } } diff --git a/trie/bintrie/trie_test.go b/trie/bintrie/trie_test.go index 73aacb76c4..8b7d9e46d6 100644 --- a/trie/bintrie/trie_test.go +++ b/trie/bintrie/trie_test.go @@ -768,8 +768,9 @@ func TestGetStorageNonMembershipInternalRoot(t *testing.T) { // flushes only the root-to-leaf path. func TestCommitSkipCleanSubtrees(t *testing.T) { tr := &BinaryTrie{ - store: newNodeStore(), - tracer: trie.NewPrevalueTracer(), + store: newNodeStore(), + tracer: trie.NewPrevalueTracer(), + groupDepth: 1, } const n = 200 key := func(i int) [HashSize]byte { diff --git a/triedb/database.go b/triedb/database.go index 533097c9e3..0fd3e1aa91 100644 --- a/triedb/database.go +++ b/triedb/database.go @@ -31,12 +31,15 @@ import ( // Config defines all necessary options for database. type Config struct { - Preimages bool // Flag whether the preimage of node key is recorded - IsUBT bool // Flag whether the db is holding a verkle tree - HashDB *hashdb.Config // Configs for hash-based scheme - PathDB *pathdb.Config // Configs for experimental path-based scheme + Preimages bool // Flag whether the preimage of node key is recorded + IsUBT bool // Flag whether the db is holding a unified binary tree + BinTrieGroupDepth int // Number of levels per serialized group in binary trie (1-8, default 8) + HashDB *hashdb.Config // Configs for hash-based scheme + PathDB *pathdb.Config // Configs for experimental path-based scheme } +const DefaultBinTrieGroupDepth = 5 + // HashDefaults represents a config for using hash-based scheme with // default settings. var HashDefaults = &Config{ @@ -45,12 +48,13 @@ var HashDefaults = &Config{ HashDB: hashdb.Defaults, } -// UBTDefaults represents a config for holding verkle trie data +// UBTDefaults represents a config for holding unified binary trie data // using path-based scheme with default settings. var UBTDefaults = &Config{ - Preimages: false, - IsUBT: true, - PathDB: pathdb.Defaults, + Preimages: false, + IsUBT: true, + BinTrieGroupDepth: DefaultBinTrieGroupDepth, + PathDB: pathdb.Defaults, } // backend defines the methods needed to access/update trie nodes in different @@ -393,3 +397,7 @@ func (db *Database) SnapshotCompleted() bool { } return pdb.SnapshotCompleted() } + +func (db *Database) BinTrieGroupDepth() int { + return db.config.BinTrieGroupDepth +} diff --git a/triedb/pathdb/journal.go b/triedb/pathdb/journal.go index efcc3f2549..657fbbff27 100644 --- a/triedb/pathdb/journal.go +++ b/triedb/pathdb/journal.go @@ -161,7 +161,19 @@ func loadGenerator(db ethdb.KeyValueReader, hash nodeHasher) (*journalGenerator, // loadLayers loads a pre-existing state layer backed by a key-value store. func (db *Database) loadLayers() layer { // Retrieve the root node of persistent state. - root, err := db.hasher(rawdb.ReadAccountTrieNode(db.diskdb, nil)) + var ( + root common.Hash + err error + ) + if db.isUBT { + root = rawdb.ReadSnapshotRoot(db.diskdb) + if root == (common.Hash{}) { + root = types.EmptyBinaryHash + } + } else { + blob := rawdb.ReadAccountTrieNode(db.diskdb, nil) + root, err = db.hasher(blob) + } if err != nil { log.Crit("Failed to compute node hash", "err", err) }