From 58557cb4635d4e6f3e49fcdc82a6469554e929a6 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet <3272758+gballet@users.noreply.github.com> Date: Thu, 9 Apr 2026 10:27:19 +0200 Subject: [PATCH] cmd/geth: add subcommand for offline binary tree conversion (#33740) This tool is designed for the offline translation of an MPT database to a binary trie. This is to be used for users who e.g. want to prove equivalence of a binary tree chain shadowing the MPT chain. It adds a `bintrie` command, cleanly separating the concerns. --- cmd/geth/bintrie_convert.go | 408 +++++++++++++++++++++++++++++++ cmd/geth/bintrie_convert_test.go | 229 +++++++++++++++++ cmd/geth/main.go | 2 + 3 files changed, 639 insertions(+) create mode 100644 cmd/geth/bintrie_convert.go create mode 100644 cmd/geth/bintrie_convert_test.go diff --git a/cmd/geth/bintrie_convert.go b/cmd/geth/bintrie_convert.go new file mode 100644 index 0000000000..3730768697 --- /dev/null +++ b/cmd/geth/bintrie_convert.go @@ -0,0 +1,408 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +package main + +import ( + "errors" + "fmt" + "runtime" + "runtime/debug" + "slices" + "time" + + "github.com/ethereum/go-ethereum/cmd/utils" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/bintrie" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/triedb" + "github.com/ethereum/go-ethereum/triedb/pathdb" + "github.com/urfave/cli/v2" +) + +var ( + deleteSourceFlag = &cli.BoolFlag{ + Name: "delete-source", + Usage: "Delete MPT trie nodes after conversion", + } + memoryLimitFlag = &cli.Uint64Flag{ + Name: "memory-limit", + Usage: "Max heap allocation in MB before forcing a commit cycle", + Value: 16384, + } + + bintrieCommand = &cli.Command{ + Name: "bintrie", + Usage: "A set of commands for binary trie operations", + Description: "", + Subcommands: []*cli.Command{ + { + Name: "convert", + Usage: "Convert MPT state to binary trie", + ArgsUsage: "[state-root]", + Action: convertToBinaryTrie, + Flags: slices.Concat([]cli.Flag{ + deleteSourceFlag, + memoryLimitFlag, + }, utils.NetworkFlags, utils.DatabaseFlags), + Description: ` +geth bintrie convert [--delete-source] [--memory-limit MB] [state-root] + +Reads all state from the Merkle Patricia Trie and writes it into a Binary Trie, +operating offline. Memory-safe via periodic commit-and-reload cycles. + +The optional state-root argument specifies which state root to convert. +If omitted, the head block's state root is used. + +Flags: + --delete-source Delete MPT trie nodes after successful conversion + --memory-limit Max heap allocation in MB before forcing a commit (default: 16384) +`, + }, + }, + } +) + +type conversionStats struct { + accounts uint64 + slots uint64 + codes uint64 + commits uint64 + start time.Time + lastReport time.Time + lastMemChk time.Time +} + +func (s *conversionStats) report(force bool) { + if !force && time.Since(s.lastReport) < 8*time.Second { + return + } + elapsed := time.Since(s.start).Seconds() + acctRate := float64(0) + if elapsed > 0 { + acctRate = float64(s.accounts) / elapsed + } + log.Info("Conversion progress", + "accounts", s.accounts, + "slots", s.slots, + "codes", s.codes, + "commits", s.commits, + "accounts/sec", fmt.Sprintf("%.0f", acctRate), + "elapsed", common.PrettyDuration(time.Since(s.start)), + ) + s.lastReport = time.Now() +} + +func convertToBinaryTrie(ctx *cli.Context) error { + if ctx.NArg() > 1 { + return errors.New("too many arguments") + } + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + chaindb := utils.MakeChainDatabase(ctx, stack, false) + defer chaindb.Close() + + headBlock := rawdb.ReadHeadBlock(chaindb) + if headBlock == nil { + return errors.New("no head block found") + } + var ( + root common.Hash + err error + ) + if ctx.NArg() == 1 { + root, err = parseRoot(ctx.Args().First()) + if err != nil { + return fmt.Errorf("invalid state root: %w", err) + } + } else { + root = headBlock.Root() + } + log.Info("Starting MPT to binary trie conversion", "root", root, "block", headBlock.NumberU64()) + + srcTriedb := utils.MakeTrieDatabase(ctx, stack, chaindb, true, true, false) + defer srcTriedb.Close() + + destTriedb := triedb.NewDatabase(chaindb, &triedb.Config{ + IsVerkle: true, + PathDB: &pathdb.Config{ + JournalDirectory: stack.ResolvePath("triedb-bintrie"), + }, + }) + defer destTriedb.Close() + + binTrie, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb) + if err != nil { + return fmt.Errorf("failed to create binary trie: %w", err) + } + memLimit := ctx.Uint64(memoryLimitFlag.Name) * 1024 * 1024 + + currentRoot, err := runConversionLoop(chaindb, srcTriedb, destTriedb, binTrie, root, memLimit) + if err != nil { + return err + } + log.Info("Conversion complete", "binaryRoot", currentRoot) + + if ctx.Bool(deleteSourceFlag.Name) { + log.Info("Deleting source MPT data") + if err := deleteMPTData(chaindb, srcTriedb, root); err != nil { + return fmt.Errorf("MPT deletion failed: %w", err) + } + log.Info("Source MPT data deleted") + } + return nil +} + +func runConversionLoop(chaindb ethdb.Database, srcTriedb *triedb.Database, destTriedb *triedb.Database, binTrie *bintrie.BinaryTrie, root common.Hash, memLimit uint64) (common.Hash, error) { + currentRoot := types.EmptyBinaryHash + stats := &conversionStats{ + start: time.Now(), + lastReport: time.Now(), + lastMemChk: time.Now(), + } + + srcTrie, err := trie.NewStateTrie(trie.StateTrieID(root), srcTriedb) + if err != nil { + return common.Hash{}, fmt.Errorf("failed to open source trie: %w", err) + } + acctIt, err := srcTrie.NodeIterator(nil) + if err != nil { + return common.Hash{}, fmt.Errorf("failed to create account iterator: %w", err) + } + accIter := trie.NewIterator(acctIt) + + for accIter.Next() { + var acc types.StateAccount + if err := rlp.DecodeBytes(accIter.Value, &acc); err != nil { + return common.Hash{}, fmt.Errorf("invalid account RLP: %w", err) + } + addrBytes := srcTrie.GetKey(accIter.Key) + if addrBytes == nil { + return common.Hash{}, fmt.Errorf("missing preimage for account hash %x (run with --cache.preimages)", accIter.Key) + } + addr := common.BytesToAddress(addrBytes) + + var code []byte + codeHash := common.BytesToHash(acc.CodeHash) + if codeHash != types.EmptyCodeHash { + code = rawdb.ReadCode(chaindb, codeHash) + if code == nil { + return common.Hash{}, fmt.Errorf("missing code for hash %x (account %x)", codeHash, addr) + } + stats.codes++ + } + + if err := binTrie.UpdateAccount(addr, &acc, len(code)); err != nil { + return common.Hash{}, fmt.Errorf("failed to update account %x: %w", addr, err) + } + if len(code) > 0 { + if err := binTrie.UpdateContractCode(addr, codeHash, code); err != nil { + return common.Hash{}, fmt.Errorf("failed to update code for %x: %w", addr, err) + } + } + + if acc.Root != types.EmptyRootHash { + addrHash := common.BytesToHash(accIter.Key) + storageTrie, err := trie.NewStateTrie(trie.StorageTrieID(root, addrHash, acc.Root), srcTriedb) + if err != nil { + return common.Hash{}, fmt.Errorf("failed to open storage trie for %x: %w", addr, err) + } + storageNodeIt, err := storageTrie.NodeIterator(nil) + if err != nil { + return common.Hash{}, fmt.Errorf("failed to create storage iterator for %x: %w", addr, err) + } + storageIter := trie.NewIterator(storageNodeIt) + + slotCount := uint64(0) + for storageIter.Next() { + slotKey := storageTrie.GetKey(storageIter.Key) + if slotKey == nil { + return common.Hash{}, fmt.Errorf("missing preimage for storage key %x (account %x)", storageIter.Key, addr) + } + _, content, _, err := rlp.Split(storageIter.Value) + if err != nil { + return common.Hash{}, fmt.Errorf("invalid storage RLP for key %x (account %x): %w", slotKey, addr, err) + } + if err := binTrie.UpdateStorage(addr, slotKey, content); err != nil { + return common.Hash{}, fmt.Errorf("failed to update storage %x/%x: %w", addr, slotKey, err) + } + stats.slots++ + slotCount++ + + if slotCount%10000 == 0 { + binTrie, currentRoot, err = maybeCommit(binTrie, currentRoot, destTriedb, memLimit, stats) + if err != nil { + return common.Hash{}, err + } + } + } + if storageIter.Err != nil { + return common.Hash{}, fmt.Errorf("storage iteration error for %x: %w", addr, storageIter.Err) + } + } + stats.accounts++ + stats.report(false) + + if stats.accounts%1000 == 0 { + binTrie, currentRoot, err = maybeCommit(binTrie, currentRoot, destTriedb, memLimit, stats) + if err != nil { + return common.Hash{}, err + } + } + } + if accIter.Err != nil { + return common.Hash{}, fmt.Errorf("account iteration error: %w", accIter.Err) + } + + _, currentRoot, err = commitBinaryTrie(binTrie, currentRoot, destTriedb) + if err != nil { + return common.Hash{}, fmt.Errorf("final commit failed: %w", err) + } + stats.commits++ + stats.report(true) + return currentRoot, nil +} + +func maybeCommit(bt *bintrie.BinaryTrie, currentRoot common.Hash, destDB *triedb.Database, memLimit uint64, stats *conversionStats) (*bintrie.BinaryTrie, common.Hash, error) { + if time.Since(stats.lastMemChk) < 5*time.Second { + return bt, currentRoot, nil + } + stats.lastMemChk = time.Now() + + var m runtime.MemStats + runtime.ReadMemStats(&m) + if m.Alloc < memLimit { + return bt, currentRoot, nil + } + log.Info("Memory limit reached, committing", "alloc", common.StorageSize(m.Alloc), "limit", common.StorageSize(memLimit)) + + bt, currentRoot, err := commitBinaryTrie(bt, currentRoot, destDB) + if err != nil { + return nil, common.Hash{}, err + } + stats.commits++ + stats.report(true) + return bt, currentRoot, nil +} + +func commitBinaryTrie(bt *bintrie.BinaryTrie, currentRoot common.Hash, destDB *triedb.Database) (*bintrie.BinaryTrie, common.Hash, error) { + newRoot, nodeSet := bt.Commit(false) + if nodeSet != nil { + merged := trienode.NewWithNodeSet(nodeSet) + if err := destDB.Update(newRoot, currentRoot, 0, merged, triedb.NewStateSet()); err != nil { + return nil, common.Hash{}, fmt.Errorf("triedb update failed: %w", err) + } + if err := destDB.Commit(newRoot, false); err != nil { + return nil, common.Hash{}, fmt.Errorf("triedb commit failed: %w", err) + } + } + runtime.GC() + debug.FreeOSMemory() + + bt, err := bintrie.NewBinaryTrie(newRoot, destDB) + if err != nil { + return nil, common.Hash{}, fmt.Errorf("failed to reload binary trie: %w", err) + } + return bt, newRoot, nil +} + +func deleteMPTData(chaindb ethdb.Database, srcTriedb *triedb.Database, root common.Hash) error { + isPathDB := srcTriedb.Scheme() == rawdb.PathScheme + + srcTrie, err := trie.NewStateTrie(trie.StateTrieID(root), srcTriedb) + if err != nil { + return fmt.Errorf("failed to open source trie for deletion: %w", err) + } + acctIt, err := srcTrie.NodeIterator(nil) + if err != nil { + return fmt.Errorf("failed to create account iterator for deletion: %w", err) + } + batch := chaindb.NewBatch() + deleted := 0 + + for acctIt.Next(true) { + if isPathDB { + rawdb.DeleteAccountTrieNode(batch, acctIt.Path()) + } else { + node := acctIt.Hash() + if node != (common.Hash{}) { + rawdb.DeleteLegacyTrieNode(batch, node) + } + } + deleted++ + + if acctIt.Leaf() { + var acc types.StateAccount + if err := rlp.DecodeBytes(acctIt.LeafBlob(), &acc); err != nil { + return fmt.Errorf("invalid account during deletion: %w", err) + } + if acc.Root != types.EmptyRootHash { + addrHash := common.BytesToHash(acctIt.LeafKey()) + storageTrie, err := trie.NewStateTrie(trie.StorageTrieID(root, addrHash, acc.Root), srcTriedb) + if err != nil { + return fmt.Errorf("failed to open storage trie for deletion: %w", err) + } + storageIt, err := storageTrie.NodeIterator(nil) + if err != nil { + return fmt.Errorf("failed to create storage iterator for deletion: %w", err) + } + for storageIt.Next(true) { + if isPathDB { + rawdb.DeleteStorageTrieNode(batch, addrHash, storageIt.Path()) + } else { + node := storageIt.Hash() + if node != (common.Hash{}) { + rawdb.DeleteLegacyTrieNode(batch, node) + } + } + deleted++ + if batch.ValueSize() >= ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + return fmt.Errorf("batch write failed: %w", err) + } + batch.Reset() + } + } + if storageIt.Error() != nil { + return fmt.Errorf("storage deletion iterator error: %w", storageIt.Error()) + } + } + } + if batch.ValueSize() >= ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + return fmt.Errorf("batch write failed: %w", err) + } + batch.Reset() + } + } + if acctIt.Error() != nil { + return fmt.Errorf("account deletion iterator error: %w", acctIt.Error()) + } + if batch.ValueSize() > 0 { + if err := batch.Write(); err != nil { + return fmt.Errorf("final batch write failed: %w", err) + } + } + log.Info("MPT deletion complete", "nodesDeleted", deleted) + return nil +} diff --git a/cmd/geth/bintrie_convert_test.go b/cmd/geth/bintrie_convert_test.go new file mode 100644 index 0000000000..9b95f6a70f --- /dev/null +++ b/cmd/geth/bintrie_convert_test.go @@ -0,0 +1,229 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +package main + +import ( + "math" + "math/big" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie/bintrie" + "github.com/ethereum/go-ethereum/triedb" + "github.com/ethereum/go-ethereum/triedb/pathdb" + "github.com/holiman/uint256" +) + +func TestBintrieConvert(t *testing.T) { + var ( + addr1 = common.HexToAddress("0x1111111111111111111111111111111111111111") + addr2 = common.HexToAddress("0x2222222222222222222222222222222222222222") + slotKey1 = common.HexToHash("0x01") + slotKey2 = common.HexToHash("0x02") + slotVal1 = common.HexToHash("0xdeadbeef") + slotVal2 = common.HexToHash("0xcafebabe") + code = []byte{0x60, 0x42, 0x60, 0x00, 0x52, 0x60, 0x20, 0x60, 0x00, 0xf3} + ) + + chaindb := rawdb.NewMemoryDatabase() + + srcTriedb := triedb.NewDatabase(chaindb, &triedb.Config{ + Preimages: true, + PathDB: pathdb.Defaults, + }) + + gspec := &core.Genesis{ + Config: params.TestChainConfig, + BaseFee: big.NewInt(params.InitialBaseFee), + Alloc: types.GenesisAlloc{ + addr1: { + Balance: big.NewInt(1000000), + Nonce: 5, + }, + addr2: { + Balance: big.NewInt(2000000), + Nonce: 10, + Code: code, + Storage: map[common.Hash]common.Hash{ + slotKey1: slotVal1, + slotKey2: slotVal2, + }, + }, + }, + } + + genesisBlock := gspec.MustCommit(chaindb, srcTriedb) + root := genesisBlock.Root() + t.Logf("Genesis root: %x", root) + srcTriedb.Close() + + srcTriedb2 := triedb.NewDatabase(chaindb, &triedb.Config{ + Preimages: true, + PathDB: &pathdb.Config{ReadOnly: true}, + }) + defer srcTriedb2.Close() + + destTriedb := triedb.NewDatabase(chaindb, &triedb.Config{ + IsVerkle: true, + PathDB: pathdb.Defaults, + }) + defer destTriedb.Close() + + bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb) + if err != nil { + t.Fatalf("failed to create binary trie: %v", err) + } + + currentRoot, err := runConversionLoop(chaindb, srcTriedb2, destTriedb, bt, root, math.MaxUint64) + if err != nil { + t.Fatalf("conversion failed: %v", err) + } + t.Logf("Binary trie root: %x", currentRoot) + + bt2, err := bintrie.NewBinaryTrie(currentRoot, destTriedb) + if err != nil { + t.Fatalf("failed to reload binary trie: %v", err) + } + + acc1, err := bt2.GetAccount(addr1) + if err != nil { + t.Fatalf("failed to get account1: %v", err) + } + if acc1 == nil { + t.Fatal("account1 not found in binary trie") + } + if acc1.Nonce != 5 { + t.Errorf("account1 nonce: got %d, want 5", acc1.Nonce) + } + wantBal1 := uint256.NewInt(1000000) + if acc1.Balance.Cmp(wantBal1) != 0 { + t.Errorf("account1 balance: got %s, want %s", acc1.Balance, wantBal1) + } + + acc2, err := bt2.GetAccount(addr2) + if err != nil { + t.Fatalf("failed to get account2: %v", err) + } + if acc2 == nil { + t.Fatal("account2 not found in binary trie") + } + if acc2.Nonce != 10 { + t.Errorf("account2 nonce: got %d, want 10", acc2.Nonce) + } + wantBal2 := uint256.NewInt(2000000) + if acc2.Balance.Cmp(wantBal2) != 0 { + t.Errorf("account2 balance: got %s, want %s", acc2.Balance, wantBal2) + } + + treeKey1 := bintrie.GetBinaryTreeKeyStorageSlot(addr2, slotKey1[:]) + val1, err := bt2.GetWithHashedKey(treeKey1) + if err != nil { + t.Fatalf("failed to get storage slot1: %v", err) + } + if len(val1) == 0 { + t.Fatal("storage slot1 not found") + } + got1 := common.BytesToHash(val1) + if got1 != slotVal1 { + t.Errorf("storage slot1: got %x, want %x", got1, slotVal1) + } + + treeKey2 := bintrie.GetBinaryTreeKeyStorageSlot(addr2, slotKey2[:]) + val2, err := bt2.GetWithHashedKey(treeKey2) + if err != nil { + t.Fatalf("failed to get storage slot2: %v", err) + } + if len(val2) == 0 { + t.Fatal("storage slot2 not found") + } + got2 := common.BytesToHash(val2) + if got2 != slotVal2 { + t.Errorf("storage slot2: got %x, want %x", got2, slotVal2) + } +} + +func TestBintrieConvertDeleteSource(t *testing.T) { + addr1 := common.HexToAddress("0x3333333333333333333333333333333333333333") + + chaindb := rawdb.NewMemoryDatabase() + + srcTriedb := triedb.NewDatabase(chaindb, &triedb.Config{ + Preimages: true, + PathDB: pathdb.Defaults, + }) + + gspec := &core.Genesis{ + Config: params.TestChainConfig, + BaseFee: big.NewInt(params.InitialBaseFee), + Alloc: types.GenesisAlloc{ + addr1: { + Balance: big.NewInt(1000000), + }, + }, + } + + genesisBlock := gspec.MustCommit(chaindb, srcTriedb) + root := genesisBlock.Root() + srcTriedb.Close() + + srcTriedb2 := triedb.NewDatabase(chaindb, &triedb.Config{ + Preimages: true, + PathDB: &pathdb.Config{ReadOnly: true}, + }) + + destTriedb := triedb.NewDatabase(chaindb, &triedb.Config{ + IsVerkle: true, + PathDB: pathdb.Defaults, + }) + + bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb) + if err != nil { + t.Fatalf("failed to create binary trie: %v", err) + } + + newRoot, err := runConversionLoop(chaindb, srcTriedb2, destTriedb, bt, root, math.MaxUint64) + if err != nil { + t.Fatalf("conversion failed: %v", err) + } + + if err := deleteMPTData(chaindb, srcTriedb2, root); err != nil { + t.Fatalf("deletion failed: %v", err) + } + srcTriedb2.Close() + + bt2, err := bintrie.NewBinaryTrie(newRoot, destTriedb) + if err != nil { + t.Fatalf("failed to reload binary trie after deletion: %v", err) + } + + acc, err := bt2.GetAccount(addr1) + if err != nil { + t.Fatalf("failed to get account after deletion: %v", err) + } + if acc == nil { + t.Fatal("account not found after MPT deletion") + } + wantBal := uint256.NewInt(1000000) + if acc.Balance.Cmp(wantBal) != 0 { + t.Errorf("balance after deletion: got %s, want %s", acc.Balance, wantBal) + } + destTriedb.Close() +} diff --git a/cmd/geth/main.go b/cmd/geth/main.go index b72cbb9885..e196ac8688 100644 --- a/cmd/geth/main.go +++ b/cmd/geth/main.go @@ -260,6 +260,8 @@ func init() { utils.ShowDeprecated, // See snapshot.go snapshotCommand, + // See bintrie_convert.go + bintrieCommand, } if logTestCommand != nil { app.Commands = append(app.Commands, logTestCommand)