cmd/geth: add subcommand for offline binary tree conversion (#33740)
Some checks are pending
/ Linux Build (push) Waiting to run
/ Linux Build (arm) (push) Waiting to run
/ Keeper Build (push) Waiting to run
/ Windows Build (push) Waiting to run
/ Docker Image (push) Waiting to run

This tool is designed for the offline translation of an MPT database to
a binary trie. This is to be used for users who e.g. want to prove
equivalence of a binary tree chain shadowing the MPT chain.

It adds a `bintrie` command, cleanly separating the concerns.
This commit is contained in:
Guillaume Ballet 2026-04-09 10:27:19 +02:00 committed by GitHub
parent 3772bb536a
commit 58557cb463
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 639 additions and 0 deletions

408
cmd/geth/bintrie_convert.go Normal file
View file

@ -0,0 +1,408 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of go-ethereum.
//
// go-ethereum is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// go-ethereum is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
package main
import (
"errors"
"fmt"
"runtime"
"runtime/debug"
"slices"
"time"
"github.com/ethereum/go-ethereum/cmd/utils"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
"github.com/ethereum/go-ethereum/trie/bintrie"
"github.com/ethereum/go-ethereum/trie/trienode"
"github.com/ethereum/go-ethereum/triedb"
"github.com/ethereum/go-ethereum/triedb/pathdb"
"github.com/urfave/cli/v2"
)
var (
deleteSourceFlag = &cli.BoolFlag{
Name: "delete-source",
Usage: "Delete MPT trie nodes after conversion",
}
memoryLimitFlag = &cli.Uint64Flag{
Name: "memory-limit",
Usage: "Max heap allocation in MB before forcing a commit cycle",
Value: 16384,
}
bintrieCommand = &cli.Command{
Name: "bintrie",
Usage: "A set of commands for binary trie operations",
Description: "",
Subcommands: []*cli.Command{
{
Name: "convert",
Usage: "Convert MPT state to binary trie",
ArgsUsage: "[state-root]",
Action: convertToBinaryTrie,
Flags: slices.Concat([]cli.Flag{
deleteSourceFlag,
memoryLimitFlag,
}, utils.NetworkFlags, utils.DatabaseFlags),
Description: `
geth bintrie convert [--delete-source] [--memory-limit MB] [state-root]
Reads all state from the Merkle Patricia Trie and writes it into a Binary Trie,
operating offline. Memory-safe via periodic commit-and-reload cycles.
The optional state-root argument specifies which state root to convert.
If omitted, the head block's state root is used.
Flags:
--delete-source Delete MPT trie nodes after successful conversion
--memory-limit Max heap allocation in MB before forcing a commit (default: 16384)
`,
},
},
}
)
type conversionStats struct {
accounts uint64
slots uint64
codes uint64
commits uint64
start time.Time
lastReport time.Time
lastMemChk time.Time
}
func (s *conversionStats) report(force bool) {
if !force && time.Since(s.lastReport) < 8*time.Second {
return
}
elapsed := time.Since(s.start).Seconds()
acctRate := float64(0)
if elapsed > 0 {
acctRate = float64(s.accounts) / elapsed
}
log.Info("Conversion progress",
"accounts", s.accounts,
"slots", s.slots,
"codes", s.codes,
"commits", s.commits,
"accounts/sec", fmt.Sprintf("%.0f", acctRate),
"elapsed", common.PrettyDuration(time.Since(s.start)),
)
s.lastReport = time.Now()
}
func convertToBinaryTrie(ctx *cli.Context) error {
if ctx.NArg() > 1 {
return errors.New("too many arguments")
}
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, false)
defer chaindb.Close()
headBlock := rawdb.ReadHeadBlock(chaindb)
if headBlock == nil {
return errors.New("no head block found")
}
var (
root common.Hash
err error
)
if ctx.NArg() == 1 {
root, err = parseRoot(ctx.Args().First())
if err != nil {
return fmt.Errorf("invalid state root: %w", err)
}
} else {
root = headBlock.Root()
}
log.Info("Starting MPT to binary trie conversion", "root", root, "block", headBlock.NumberU64())
srcTriedb := utils.MakeTrieDatabase(ctx, stack, chaindb, true, true, false)
defer srcTriedb.Close()
destTriedb := triedb.NewDatabase(chaindb, &triedb.Config{
IsVerkle: true,
PathDB: &pathdb.Config{
JournalDirectory: stack.ResolvePath("triedb-bintrie"),
},
})
defer destTriedb.Close()
binTrie, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb)
if err != nil {
return fmt.Errorf("failed to create binary trie: %w", err)
}
memLimit := ctx.Uint64(memoryLimitFlag.Name) * 1024 * 1024
currentRoot, err := runConversionLoop(chaindb, srcTriedb, destTriedb, binTrie, root, memLimit)
if err != nil {
return err
}
log.Info("Conversion complete", "binaryRoot", currentRoot)
if ctx.Bool(deleteSourceFlag.Name) {
log.Info("Deleting source MPT data")
if err := deleteMPTData(chaindb, srcTriedb, root); err != nil {
return fmt.Errorf("MPT deletion failed: %w", err)
}
log.Info("Source MPT data deleted")
}
return nil
}
func runConversionLoop(chaindb ethdb.Database, srcTriedb *triedb.Database, destTriedb *triedb.Database, binTrie *bintrie.BinaryTrie, root common.Hash, memLimit uint64) (common.Hash, error) {
currentRoot := types.EmptyBinaryHash
stats := &conversionStats{
start: time.Now(),
lastReport: time.Now(),
lastMemChk: time.Now(),
}
srcTrie, err := trie.NewStateTrie(trie.StateTrieID(root), srcTriedb)
if err != nil {
return common.Hash{}, fmt.Errorf("failed to open source trie: %w", err)
}
acctIt, err := srcTrie.NodeIterator(nil)
if err != nil {
return common.Hash{}, fmt.Errorf("failed to create account iterator: %w", err)
}
accIter := trie.NewIterator(acctIt)
for accIter.Next() {
var acc types.StateAccount
if err := rlp.DecodeBytes(accIter.Value, &acc); err != nil {
return common.Hash{}, fmt.Errorf("invalid account RLP: %w", err)
}
addrBytes := srcTrie.GetKey(accIter.Key)
if addrBytes == nil {
return common.Hash{}, fmt.Errorf("missing preimage for account hash %x (run with --cache.preimages)", accIter.Key)
}
addr := common.BytesToAddress(addrBytes)
var code []byte
codeHash := common.BytesToHash(acc.CodeHash)
if codeHash != types.EmptyCodeHash {
code = rawdb.ReadCode(chaindb, codeHash)
if code == nil {
return common.Hash{}, fmt.Errorf("missing code for hash %x (account %x)", codeHash, addr)
}
stats.codes++
}
if err := binTrie.UpdateAccount(addr, &acc, len(code)); err != nil {
return common.Hash{}, fmt.Errorf("failed to update account %x: %w", addr, err)
}
if len(code) > 0 {
if err := binTrie.UpdateContractCode(addr, codeHash, code); err != nil {
return common.Hash{}, fmt.Errorf("failed to update code for %x: %w", addr, err)
}
}
if acc.Root != types.EmptyRootHash {
addrHash := common.BytesToHash(accIter.Key)
storageTrie, err := trie.NewStateTrie(trie.StorageTrieID(root, addrHash, acc.Root), srcTriedb)
if err != nil {
return common.Hash{}, fmt.Errorf("failed to open storage trie for %x: %w", addr, err)
}
storageNodeIt, err := storageTrie.NodeIterator(nil)
if err != nil {
return common.Hash{}, fmt.Errorf("failed to create storage iterator for %x: %w", addr, err)
}
storageIter := trie.NewIterator(storageNodeIt)
slotCount := uint64(0)
for storageIter.Next() {
slotKey := storageTrie.GetKey(storageIter.Key)
if slotKey == nil {
return common.Hash{}, fmt.Errorf("missing preimage for storage key %x (account %x)", storageIter.Key, addr)
}
_, content, _, err := rlp.Split(storageIter.Value)
if err != nil {
return common.Hash{}, fmt.Errorf("invalid storage RLP for key %x (account %x): %w", slotKey, addr, err)
}
if err := binTrie.UpdateStorage(addr, slotKey, content); err != nil {
return common.Hash{}, fmt.Errorf("failed to update storage %x/%x: %w", addr, slotKey, err)
}
stats.slots++
slotCount++
if slotCount%10000 == 0 {
binTrie, currentRoot, err = maybeCommit(binTrie, currentRoot, destTriedb, memLimit, stats)
if err != nil {
return common.Hash{}, err
}
}
}
if storageIter.Err != nil {
return common.Hash{}, fmt.Errorf("storage iteration error for %x: %w", addr, storageIter.Err)
}
}
stats.accounts++
stats.report(false)
if stats.accounts%1000 == 0 {
binTrie, currentRoot, err = maybeCommit(binTrie, currentRoot, destTriedb, memLimit, stats)
if err != nil {
return common.Hash{}, err
}
}
}
if accIter.Err != nil {
return common.Hash{}, fmt.Errorf("account iteration error: %w", accIter.Err)
}
_, currentRoot, err = commitBinaryTrie(binTrie, currentRoot, destTriedb)
if err != nil {
return common.Hash{}, fmt.Errorf("final commit failed: %w", err)
}
stats.commits++
stats.report(true)
return currentRoot, nil
}
func maybeCommit(bt *bintrie.BinaryTrie, currentRoot common.Hash, destDB *triedb.Database, memLimit uint64, stats *conversionStats) (*bintrie.BinaryTrie, common.Hash, error) {
if time.Since(stats.lastMemChk) < 5*time.Second {
return bt, currentRoot, nil
}
stats.lastMemChk = time.Now()
var m runtime.MemStats
runtime.ReadMemStats(&m)
if m.Alloc < memLimit {
return bt, currentRoot, nil
}
log.Info("Memory limit reached, committing", "alloc", common.StorageSize(m.Alloc), "limit", common.StorageSize(memLimit))
bt, currentRoot, err := commitBinaryTrie(bt, currentRoot, destDB)
if err != nil {
return nil, common.Hash{}, err
}
stats.commits++
stats.report(true)
return bt, currentRoot, nil
}
func commitBinaryTrie(bt *bintrie.BinaryTrie, currentRoot common.Hash, destDB *triedb.Database) (*bintrie.BinaryTrie, common.Hash, error) {
newRoot, nodeSet := bt.Commit(false)
if nodeSet != nil {
merged := trienode.NewWithNodeSet(nodeSet)
if err := destDB.Update(newRoot, currentRoot, 0, merged, triedb.NewStateSet()); err != nil {
return nil, common.Hash{}, fmt.Errorf("triedb update failed: %w", err)
}
if err := destDB.Commit(newRoot, false); err != nil {
return nil, common.Hash{}, fmt.Errorf("triedb commit failed: %w", err)
}
}
runtime.GC()
debug.FreeOSMemory()
bt, err := bintrie.NewBinaryTrie(newRoot, destDB)
if err != nil {
return nil, common.Hash{}, fmt.Errorf("failed to reload binary trie: %w", err)
}
return bt, newRoot, nil
}
func deleteMPTData(chaindb ethdb.Database, srcTriedb *triedb.Database, root common.Hash) error {
isPathDB := srcTriedb.Scheme() == rawdb.PathScheme
srcTrie, err := trie.NewStateTrie(trie.StateTrieID(root), srcTriedb)
if err != nil {
return fmt.Errorf("failed to open source trie for deletion: %w", err)
}
acctIt, err := srcTrie.NodeIterator(nil)
if err != nil {
return fmt.Errorf("failed to create account iterator for deletion: %w", err)
}
batch := chaindb.NewBatch()
deleted := 0
for acctIt.Next(true) {
if isPathDB {
rawdb.DeleteAccountTrieNode(batch, acctIt.Path())
} else {
node := acctIt.Hash()
if node != (common.Hash{}) {
rawdb.DeleteLegacyTrieNode(batch, node)
}
}
deleted++
if acctIt.Leaf() {
var acc types.StateAccount
if err := rlp.DecodeBytes(acctIt.LeafBlob(), &acc); err != nil {
return fmt.Errorf("invalid account during deletion: %w", err)
}
if acc.Root != types.EmptyRootHash {
addrHash := common.BytesToHash(acctIt.LeafKey())
storageTrie, err := trie.NewStateTrie(trie.StorageTrieID(root, addrHash, acc.Root), srcTriedb)
if err != nil {
return fmt.Errorf("failed to open storage trie for deletion: %w", err)
}
storageIt, err := storageTrie.NodeIterator(nil)
if err != nil {
return fmt.Errorf("failed to create storage iterator for deletion: %w", err)
}
for storageIt.Next(true) {
if isPathDB {
rawdb.DeleteStorageTrieNode(batch, addrHash, storageIt.Path())
} else {
node := storageIt.Hash()
if node != (common.Hash{}) {
rawdb.DeleteLegacyTrieNode(batch, node)
}
}
deleted++
if batch.ValueSize() >= ethdb.IdealBatchSize {
if err := batch.Write(); err != nil {
return fmt.Errorf("batch write failed: %w", err)
}
batch.Reset()
}
}
if storageIt.Error() != nil {
return fmt.Errorf("storage deletion iterator error: %w", storageIt.Error())
}
}
}
if batch.ValueSize() >= ethdb.IdealBatchSize {
if err := batch.Write(); err != nil {
return fmt.Errorf("batch write failed: %w", err)
}
batch.Reset()
}
}
if acctIt.Error() != nil {
return fmt.Errorf("account deletion iterator error: %w", acctIt.Error())
}
if batch.ValueSize() > 0 {
if err := batch.Write(); err != nil {
return fmt.Errorf("final batch write failed: %w", err)
}
}
log.Info("MPT deletion complete", "nodesDeleted", deleted)
return nil
}

View file

@ -0,0 +1,229 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of go-ethereum.
//
// go-ethereum is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// go-ethereum is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
package main
import (
"math"
"math/big"
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/params"
"github.com/ethereum/go-ethereum/trie/bintrie"
"github.com/ethereum/go-ethereum/triedb"
"github.com/ethereum/go-ethereum/triedb/pathdb"
"github.com/holiman/uint256"
)
func TestBintrieConvert(t *testing.T) {
var (
addr1 = common.HexToAddress("0x1111111111111111111111111111111111111111")
addr2 = common.HexToAddress("0x2222222222222222222222222222222222222222")
slotKey1 = common.HexToHash("0x01")
slotKey2 = common.HexToHash("0x02")
slotVal1 = common.HexToHash("0xdeadbeef")
slotVal2 = common.HexToHash("0xcafebabe")
code = []byte{0x60, 0x42, 0x60, 0x00, 0x52, 0x60, 0x20, 0x60, 0x00, 0xf3}
)
chaindb := rawdb.NewMemoryDatabase()
srcTriedb := triedb.NewDatabase(chaindb, &triedb.Config{
Preimages: true,
PathDB: pathdb.Defaults,
})
gspec := &core.Genesis{
Config: params.TestChainConfig,
BaseFee: big.NewInt(params.InitialBaseFee),
Alloc: types.GenesisAlloc{
addr1: {
Balance: big.NewInt(1000000),
Nonce: 5,
},
addr2: {
Balance: big.NewInt(2000000),
Nonce: 10,
Code: code,
Storage: map[common.Hash]common.Hash{
slotKey1: slotVal1,
slotKey2: slotVal2,
},
},
},
}
genesisBlock := gspec.MustCommit(chaindb, srcTriedb)
root := genesisBlock.Root()
t.Logf("Genesis root: %x", root)
srcTriedb.Close()
srcTriedb2 := triedb.NewDatabase(chaindb, &triedb.Config{
Preimages: true,
PathDB: &pathdb.Config{ReadOnly: true},
})
defer srcTriedb2.Close()
destTriedb := triedb.NewDatabase(chaindb, &triedb.Config{
IsVerkle: true,
PathDB: pathdb.Defaults,
})
defer destTriedb.Close()
bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb)
if err != nil {
t.Fatalf("failed to create binary trie: %v", err)
}
currentRoot, err := runConversionLoop(chaindb, srcTriedb2, destTriedb, bt, root, math.MaxUint64)
if err != nil {
t.Fatalf("conversion failed: %v", err)
}
t.Logf("Binary trie root: %x", currentRoot)
bt2, err := bintrie.NewBinaryTrie(currentRoot, destTriedb)
if err != nil {
t.Fatalf("failed to reload binary trie: %v", err)
}
acc1, err := bt2.GetAccount(addr1)
if err != nil {
t.Fatalf("failed to get account1: %v", err)
}
if acc1 == nil {
t.Fatal("account1 not found in binary trie")
}
if acc1.Nonce != 5 {
t.Errorf("account1 nonce: got %d, want 5", acc1.Nonce)
}
wantBal1 := uint256.NewInt(1000000)
if acc1.Balance.Cmp(wantBal1) != 0 {
t.Errorf("account1 balance: got %s, want %s", acc1.Balance, wantBal1)
}
acc2, err := bt2.GetAccount(addr2)
if err != nil {
t.Fatalf("failed to get account2: %v", err)
}
if acc2 == nil {
t.Fatal("account2 not found in binary trie")
}
if acc2.Nonce != 10 {
t.Errorf("account2 nonce: got %d, want 10", acc2.Nonce)
}
wantBal2 := uint256.NewInt(2000000)
if acc2.Balance.Cmp(wantBal2) != 0 {
t.Errorf("account2 balance: got %s, want %s", acc2.Balance, wantBal2)
}
treeKey1 := bintrie.GetBinaryTreeKeyStorageSlot(addr2, slotKey1[:])
val1, err := bt2.GetWithHashedKey(treeKey1)
if err != nil {
t.Fatalf("failed to get storage slot1: %v", err)
}
if len(val1) == 0 {
t.Fatal("storage slot1 not found")
}
got1 := common.BytesToHash(val1)
if got1 != slotVal1 {
t.Errorf("storage slot1: got %x, want %x", got1, slotVal1)
}
treeKey2 := bintrie.GetBinaryTreeKeyStorageSlot(addr2, slotKey2[:])
val2, err := bt2.GetWithHashedKey(treeKey2)
if err != nil {
t.Fatalf("failed to get storage slot2: %v", err)
}
if len(val2) == 0 {
t.Fatal("storage slot2 not found")
}
got2 := common.BytesToHash(val2)
if got2 != slotVal2 {
t.Errorf("storage slot2: got %x, want %x", got2, slotVal2)
}
}
func TestBintrieConvertDeleteSource(t *testing.T) {
addr1 := common.HexToAddress("0x3333333333333333333333333333333333333333")
chaindb := rawdb.NewMemoryDatabase()
srcTriedb := triedb.NewDatabase(chaindb, &triedb.Config{
Preimages: true,
PathDB: pathdb.Defaults,
})
gspec := &core.Genesis{
Config: params.TestChainConfig,
BaseFee: big.NewInt(params.InitialBaseFee),
Alloc: types.GenesisAlloc{
addr1: {
Balance: big.NewInt(1000000),
},
},
}
genesisBlock := gspec.MustCommit(chaindb, srcTriedb)
root := genesisBlock.Root()
srcTriedb.Close()
srcTriedb2 := triedb.NewDatabase(chaindb, &triedb.Config{
Preimages: true,
PathDB: &pathdb.Config{ReadOnly: true},
})
destTriedb := triedb.NewDatabase(chaindb, &triedb.Config{
IsVerkle: true,
PathDB: pathdb.Defaults,
})
bt, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, destTriedb)
if err != nil {
t.Fatalf("failed to create binary trie: %v", err)
}
newRoot, err := runConversionLoop(chaindb, srcTriedb2, destTriedb, bt, root, math.MaxUint64)
if err != nil {
t.Fatalf("conversion failed: %v", err)
}
if err := deleteMPTData(chaindb, srcTriedb2, root); err != nil {
t.Fatalf("deletion failed: %v", err)
}
srcTriedb2.Close()
bt2, err := bintrie.NewBinaryTrie(newRoot, destTriedb)
if err != nil {
t.Fatalf("failed to reload binary trie after deletion: %v", err)
}
acc, err := bt2.GetAccount(addr1)
if err != nil {
t.Fatalf("failed to get account after deletion: %v", err)
}
if acc == nil {
t.Fatal("account not found after MPT deletion")
}
wantBal := uint256.NewInt(1000000)
if acc.Balance.Cmp(wantBal) != 0 {
t.Errorf("balance after deletion: got %s, want %s", acc.Balance, wantBal)
}
destTriedb.Close()
}

View file

@ -260,6 +260,8 @@ func init() {
utils.ShowDeprecated,
// See snapshot.go
snapshotCommand,
// See bintrie_convert.go
bintrieCommand,
}
if logTestCommand != nil {
app.Commands = append(app.Commands, logTestCommand)