go-ethereum/trie/triecompare/compare_test.go
weiihann d61dd875d8 nomt: add triecompare package and fix sort.SliceStable bug in stem grouping
Add trie/triecompare/ package with realistic state generation and cross-
validation tests proving NOMT produces identical roots as bintrie at scale
(10K+ accounts, PowerLaw/Uniform/Exponential distributions, multi-block).

Fix a subtle bug in groupAndHashStems: sort.Slice was used instead of
sort.SliceStable, causing non-deterministic results when the same account
is mutated twice in a single block (duplicate stem+suffix entries need
last-writer-wins ordering preserved).

Tests: 5 correctness tests + 4 benchmarks + storage footprint comparison.
All pass with race detector clean.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 00:07:04 +08:00

522 lines
14 KiB
Go

package triecompare
import (
"fmt"
"math/bits"
"os"
"path/filepath"
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/trie/bintrie"
"github.com/ethereum/go-ethereum/trie/nomttrie"
"github.com/ethereum/go-ethereum/trie/trienode"
"github.com/ethereum/go-ethereum/triedb"
"github.com/ethereum/go-ethereum/triedb/nomtdb"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// ---------------------------------------------------------------------------
// Test helpers
// ---------------------------------------------------------------------------
func newBintrie(t testing.TB) *bintrie.BinaryTrie {
t.Helper()
diskdb := rawdb.NewMemoryDatabase()
trieDB := triedb.NewDatabase(diskdb, nil)
t.Cleanup(func() { trieDB.Close() })
bt, err := bintrie.NewBinaryTrie(types.EmptyRootHash, trieDB)
require.NoError(t, err)
return bt
}
func newNomtTrieWithDir(t testing.TB, htCapacity uint64) (*nomttrie.NomtTrie, string) {
t.Helper()
diskdb := rawdb.NewMemoryDatabase()
dir := t.TempDir()
backend := nomtdb.New(diskdb, &nomtdb.Config{
DataDir: dir,
HTCapacity: htCapacity,
})
t.Cleanup(func() { backend.Close() })
nt, err := nomttrie.New(common.Hash{}, backend)
require.NoError(t, err)
return nt, dir
}
// applyOp applies a single StateOp to both bintrie and nomttrie.
func applyOp(t testing.TB, bt *bintrie.BinaryTrie, nt *nomttrie.NomtTrie, op StateOp) {
t.Helper()
switch op.Kind {
case OpUpdateAccount:
require.NoError(t, bt.UpdateAccount(op.Address, op.Account, op.CodeLen))
require.NoError(t, nt.UpdateAccount(op.Address, op.Account, op.CodeLen))
case OpUpdateStorage:
require.NoError(t, bt.UpdateStorage(op.Address, op.Slot, op.Value))
require.NoError(t, nt.UpdateStorage(op.Address, op.Slot, op.Value))
case OpUpdateCode:
require.NoError(t, bt.UpdateContractCode(op.Address, common.Hash{}, op.Code))
require.NoError(t, nt.UpdateContractCode(op.Address, common.Hash{}, op.Code))
}
}
// ---------------------------------------------------------------------------
// Test configurations
// ---------------------------------------------------------------------------
var (
smallConfig = StateGenConfig{
NumAccounts: 100,
NumContracts: 50,
MinSlots: 1,
MaxSlots: 20,
CodeSize: 128,
Distribution: PowerLaw,
Seed: 42,
}
mediumConfig = StateGenConfig{
NumAccounts: 1_000,
NumContracts: 500,
MinSlots: 1,
MaxSlots: 100,
CodeSize: 256,
Distribution: PowerLaw,
Seed: 42,
}
largeConfig = StateGenConfig{
NumAccounts: 10_000,
NumContracts: 5_000,
MinSlots: 1,
MaxSlots: 500,
CodeSize: 512,
Distribution: PowerLaw,
Seed: 42,
}
)
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
// TestRootEquality generates realistic state at various sizes and verifies
// that bintrie and NOMT produce identical state roots after each block.
func TestRootEquality(t *testing.T) {
configs := map[string]StateGenConfig{
"Small": smallConfig,
}
if !testing.Short() {
configs["Medium"] = mediumConfig
configs["Large"] = largeConfig
}
for name, cfg := range configs {
t.Run(name, func(t *testing.T) {
blocks := GenerateBlocks(cfg)
htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, (cfg.MinSlots+cfg.MaxSlots)/2)
bt := newBintrie(t)
nt, _ := newNomtTrieWithDir(t, htCap)
for blockIdx, ops := range blocks {
for _, op := range ops {
applyOp(t, bt, nt, op)
}
binRoot := bt.Hash()
nomtRoot := nt.Hash()
t.Logf("block %d: %d ops, bintrie=%x nomt=%x",
blockIdx, len(ops), binRoot[:8], nomtRoot[:8])
assert.NotEqual(t, common.Hash{}, binRoot,
"bintrie root should be non-zero at block %d", blockIdx)
assert.Equal(t, binRoot, nomtRoot,
"root mismatch at block %d", blockIdx)
}
})
}
}
// TestDeterminism runs the same seed twice and verifies identical roots.
func TestDeterminism(t *testing.T) {
computeRoot := func() common.Hash {
blocks := GenerateBlocks(smallConfig)
htCap := estimateHTCapacity(
smallConfig.NumAccounts, smallConfig.NumContracts,
(smallConfig.MinSlots+smallConfig.MaxSlots)/2,
)
nt, _ := newNomtTrieWithDir(t, htCap)
bt := newBintrie(t)
var root common.Hash
for _, ops := range blocks {
for _, op := range ops {
applyOp(t, bt, nt, op)
}
root = nt.Hash()
bt.Hash() // flush bintrie too
}
return root
}
root1 := computeRoot()
root2 := computeRoot()
assert.Equal(t, root1, root2, "same seed must produce same root")
}
// TestDistributionVariants runs Small config with each distribution type
// and verifies matching roots for all variants.
func TestDistributionVariants(t *testing.T) {
distributions := []struct {
name string
dist Distribution
}{
{"PowerLaw", PowerLaw},
{"Uniform", Uniform},
{"Exponential", Exponential},
}
for _, d := range distributions {
t.Run(d.name, func(t *testing.T) {
cfg := smallConfig
cfg.Distribution = d.dist
cfg.Seed = 123 // same seed for all
blocks := GenerateBlocks(cfg)
htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, (cfg.MinSlots+cfg.MaxSlots)/2)
bt := newBintrie(t)
nt, _ := newNomtTrieWithDir(t, htCap)
var binRoot, nomtRoot common.Hash
for _, ops := range blocks {
for _, op := range ops {
applyOp(t, bt, nt, op)
}
binRoot = bt.Hash()
nomtRoot = nt.Hash()
}
t.Logf("dist=%s bintrie=%x nomt=%x", d.name, binRoot[:8], nomtRoot[:8])
assert.Equal(t, binRoot, nomtRoot,
"root mismatch with %s distribution", d.name)
})
}
}
// TestIncrementalRootEquality hashes after every single operation in the
// first block, catching ordering-sensitive bugs.
func TestIncrementalRootEquality(t *testing.T) {
if testing.Short() {
t.Skip("incremental test is slow")
}
// Use a smaller config to keep hash-per-op feasible.
cfg := StateGenConfig{
NumAccounts: 20,
NumContracts: 10,
MinSlots: 1,
MaxSlots: 5,
CodeSize: 64,
Distribution: Uniform,
Seed: 99,
}
blocks := GenerateBlocks(cfg)
htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, 3)
bt := newBintrie(t)
nt, _ := newNomtTrieWithDir(t, htCap)
for i, op := range blocks[0] {
applyOp(t, bt, nt, op)
binRoot := bt.Hash()
nomtRoot := nt.Hash()
if binRoot != nomtRoot {
t.Fatalf("root mismatch at op %d (kind=%d addr=%x): bin=%x nomt=%x",
i, op.Kind, op.Address[:4], binRoot[:8], nomtRoot[:8])
}
}
t.Logf("verified %d incremental hashes match", len(blocks[0]))
}
// TestStorageFootprint populates state and measures storage used by each
// implementation. Logs sizes and ratio.
func TestStorageFootprint(t *testing.T) {
if testing.Short() {
t.Skip("storage footprint test requires medium config")
}
cfg := mediumConfig
blocks := GenerateBlocks(cfg)
htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, (cfg.MinSlots+cfg.MaxSlots)/2)
bt := newBintrie(t)
nt, nomtDir := newNomtTrieWithDir(t, htCap)
for _, ops := range blocks {
for _, op := range ops {
applyOp(t, bt, nt, op)
}
}
// Force both implementations to finalize.
binRoot := bt.Hash()
nomtRoot := nt.Hash()
require.Equal(t, binRoot, nomtRoot, "roots must match before measuring storage")
// Bintrie: sum serialized node blobs from Commit.
_, ns := bt.Commit(false)
binBytes := nodesetBytes(ns)
// NOMT: sum file sizes on disk.
nomtBytes := dirSize(t, nomtDir)
ratio := float64(nomtBytes) / float64(max(binBytes, 1))
t.Logf("bintrie serialized nodes: %s (%d bytes)", humanBytes(binBytes), binBytes)
t.Logf("NOMT bitbox on disk: %s (%d bytes)", humanBytes(nomtBytes), nomtBytes)
t.Logf("NOMT / bintrie ratio: %.2fx", ratio)
}
// ---------------------------------------------------------------------------
// Benchmarks
// ---------------------------------------------------------------------------
func BenchmarkUpdateAccount(b *testing.B) {
cfg := smallConfig
blocks := GenerateBlocks(cfg)
ops := filterOps(blocks[0], OpUpdateAccount)
htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, 10)
b.Run("bintrie", func(b *testing.B) {
bt := newBintrie(b)
b.ResetTimer()
for i := range b.N {
op := ops[i%len(ops)]
_ = bt.UpdateAccount(op.Address, op.Account, op.CodeLen)
}
})
b.Run("nomt", func(b *testing.B) {
nt, _ := newNomtTrieWithDir(b, htCap)
b.ResetTimer()
for i := range b.N {
op := ops[i%len(ops)]
_ = nt.UpdateAccount(op.Address, op.Account, op.CodeLen)
}
})
}
func BenchmarkUpdateStorage(b *testing.B) {
cfg := smallConfig
blocks := GenerateBlocks(cfg)
ops := filterOps(blocks[0], OpUpdateStorage)
htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, 10)
b.Run("bintrie", func(b *testing.B) {
bt := newBintrie(b)
b.ResetTimer()
for i := range b.N {
op := ops[i%len(ops)]
_ = bt.UpdateStorage(op.Address, op.Slot, op.Value)
}
})
b.Run("nomt", func(b *testing.B) {
nt, _ := newNomtTrieWithDir(b, htCap)
b.ResetTimer()
for i := range b.N {
op := ops[i%len(ops)]
_ = nt.UpdateStorage(op.Address, op.Slot, op.Value)
}
})
}
func BenchmarkHash(b *testing.B) {
for _, size := range []int{100, 1000, 10000} {
b.Run(fmt.Sprintf("size=%d", size), func(b *testing.B) {
cfg := StateGenConfig{
NumAccounts: size,
NumContracts: 0,
MinSlots: 0,
MaxSlots: 0,
CodeSize: 0,
Distribution: Uniform,
Seed: 77,
}
blocks := GenerateBlocks(cfg)
htCap := estimateHTCapacity(size, 0, 0)
b.Run("bintrie", func(b *testing.B) {
bt := newBintrie(b)
for _, op := range blocks[0] {
_ = bt.UpdateAccount(op.Address, op.Account, op.CodeLen)
}
bt.Hash() // baseline
b.ResetTimer()
for range b.N {
// Modify one account to dirty the trie.
op := blocks[0][0]
op.Account.Nonce++
_ = bt.UpdateAccount(op.Address, op.Account, op.CodeLen)
bt.Hash()
}
})
b.Run("nomt", func(b *testing.B) {
nt, _ := newNomtTrieWithDir(b, htCap)
for _, op := range blocks[0] {
_ = nt.UpdateAccount(op.Address, op.Account, op.CodeLen)
}
nt.Hash() // baseline
b.ResetTimer()
for range b.N {
op := blocks[0][0]
op.Account.Nonce++
_ = nt.UpdateAccount(op.Address, op.Account, op.CodeLen)
nt.Hash()
}
})
})
}
}
func BenchmarkBlockWorkload(b *testing.B) {
cfg := smallConfig
blocks := GenerateBlocks(cfg)
htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, 10)
// Use block 1 (mutations) as the repeated workload.
workload := blocks[1]
b.Run("bintrie", func(b *testing.B) {
bt := newBintrie(b)
// Apply initial state.
for _, op := range blocks[0] {
applyOpSingle(b, bt, op)
}
bt.Hash()
b.ResetTimer()
for range b.N {
for _, op := range workload {
applyOpSingle(b, bt, op)
}
bt.Hash()
}
})
b.Run("nomt", func(b *testing.B) {
nt, _ := newNomtTrieWithDir(b, htCap)
for _, op := range blocks[0] {
applyOpSingleNomt(b, nt, op)
}
nt.Hash()
b.ResetTimer()
for range b.N {
for _, op := range workload {
applyOpSingleNomt(b, nt, op)
}
nt.Hash()
}
})
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
// applyOpSingle applies a StateOp to a bintrie only (for benchmarks).
func applyOpSingle(t testing.TB, bt *bintrie.BinaryTrie, op StateOp) {
t.Helper()
switch op.Kind {
case OpUpdateAccount:
_ = bt.UpdateAccount(op.Address, op.Account, op.CodeLen)
case OpUpdateStorage:
_ = bt.UpdateStorage(op.Address, op.Slot, op.Value)
case OpUpdateCode:
_ = bt.UpdateContractCode(op.Address, common.Hash{}, op.Code)
}
}
// applyOpSingleNomt applies a StateOp to a NomtTrie only (for benchmarks).
func applyOpSingleNomt(t testing.TB, nt *nomttrie.NomtTrie, op StateOp) {
t.Helper()
switch op.Kind {
case OpUpdateAccount:
_ = nt.UpdateAccount(op.Address, op.Account, op.CodeLen)
case OpUpdateStorage:
_ = nt.UpdateStorage(op.Address, op.Slot, op.Value)
case OpUpdateCode:
_ = nt.UpdateContractCode(op.Address, common.Hash{}, op.Code)
}
}
// filterOps returns only operations of the given kind.
func filterOps(ops []StateOp, kind OpKind) []StateOp {
var out []StateOp
for i := range ops {
if ops[i].Kind == kind {
out = append(out, ops[i])
}
}
return out
}
// nodesetBytes sums the serialized blob sizes from a bintrie NodeSet.
func nodesetBytes(ns *trienode.NodeSet) int64 {
if ns == nil {
return 0
}
var total int64
for _, node := range ns.Nodes {
total += int64(len(node.Blob))
}
return total
}
// dirSize walks a directory and returns total file size in bytes.
func dirSize(t testing.TB, dir string) int64 {
t.Helper()
var total int64
err := filepath.Walk(dir, func(_ string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
total += info.Size()
}
return nil
})
require.NoError(t, err)
return total
}
// estimateHTCapacity returns a power-of-2 hash table capacity for ~50% load.
// Each account uses ~1 stem; each contract uses 1 + ceil(avgSlots/256) stems.
func estimateHTCapacity(numAccounts, numContracts, avgSlots int) uint64 {
stems := numAccounts + numContracts
if avgSlots > 0 {
stems += numContracts * ((avgSlots + 255) / 256)
}
// 50% load factor → double the stem count, then round up to power of 2.
target := max(uint64(stems*2), 64)
return 1 << bits.Len64(target-1)
}
// humanBytes formats byte counts for log output.
func humanBytes(b int64) string {
switch {
case b >= 1<<20:
return fmt.Sprintf("%.1f MiB", float64(b)/(1<<20))
case b >= 1<<10:
return fmt.Sprintf("%.1f KiB", float64(b)/(1<<10))
default:
return fmt.Sprintf("%d B", b)
}
}