mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-06-19 21:31:37 +00:00
nomt: add triecompare package and fix sort.SliceStable bug in stem grouping
Add trie/triecompare/ package with realistic state generation and cross- validation tests proving NOMT produces identical roots as bintrie at scale (10K+ accounts, PowerLaw/Uniform/Exponential distributions, multi-block). Fix a subtle bug in groupAndHashStems: sort.Slice was used instead of sort.SliceStable, causing non-deterministic results when the same account is mutated twice in a single block (duplicate stem+suffix entries need last-writer-wins ordering preserved). Tests: 5 correctness tests + 4 benchmarks + storage footprint comparison. All pass with race detector clean. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
4a2a10ca7d
commit
d61dd875d8
3 changed files with 788 additions and 2 deletions
|
|
@ -86,8 +86,9 @@ func groupAndHashStems(
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
// Sort by stem, then suffix.
|
||||
sort.Slice(updates, func(i, j int) bool {
|
||||
// Stable sort by stem then suffix to preserve insertion order for
|
||||
// duplicate (stem, suffix) pairs — the last queued value must win.
|
||||
sort.SliceStable(updates, func(i, j int) bool {
|
||||
if updates[i].Stem != updates[j].Stem {
|
||||
return stemLess(&updates[i].Stem, &updates[j].Stem)
|
||||
}
|
||||
|
|
|
|||
522
trie/triecompare/compare_test.go
Normal file
522
trie/triecompare/compare_test.go
Normal file
|
|
@ -0,0 +1,522 @@
|
|||
package triecompare
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/bits"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/core/rawdb"
|
||||
"github.com/ethereum/go-ethereum/core/types"
|
||||
"github.com/ethereum/go-ethereum/trie/bintrie"
|
||||
"github.com/ethereum/go-ethereum/trie/nomttrie"
|
||||
"github.com/ethereum/go-ethereum/trie/trienode"
|
||||
"github.com/ethereum/go-ethereum/triedb"
|
||||
"github.com/ethereum/go-ethereum/triedb/nomtdb"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func newBintrie(t testing.TB) *bintrie.BinaryTrie {
|
||||
t.Helper()
|
||||
diskdb := rawdb.NewMemoryDatabase()
|
||||
trieDB := triedb.NewDatabase(diskdb, nil)
|
||||
t.Cleanup(func() { trieDB.Close() })
|
||||
bt, err := bintrie.NewBinaryTrie(types.EmptyRootHash, trieDB)
|
||||
require.NoError(t, err)
|
||||
return bt
|
||||
}
|
||||
|
||||
func newNomtTrieWithDir(t testing.TB, htCapacity uint64) (*nomttrie.NomtTrie, string) {
|
||||
t.Helper()
|
||||
diskdb := rawdb.NewMemoryDatabase()
|
||||
dir := t.TempDir()
|
||||
backend := nomtdb.New(diskdb, &nomtdb.Config{
|
||||
DataDir: dir,
|
||||
HTCapacity: htCapacity,
|
||||
})
|
||||
t.Cleanup(func() { backend.Close() })
|
||||
|
||||
nt, err := nomttrie.New(common.Hash{}, backend)
|
||||
require.NoError(t, err)
|
||||
return nt, dir
|
||||
}
|
||||
|
||||
// applyOp applies a single StateOp to both bintrie and nomttrie.
|
||||
func applyOp(t testing.TB, bt *bintrie.BinaryTrie, nt *nomttrie.NomtTrie, op StateOp) {
|
||||
t.Helper()
|
||||
switch op.Kind {
|
||||
case OpUpdateAccount:
|
||||
require.NoError(t, bt.UpdateAccount(op.Address, op.Account, op.CodeLen))
|
||||
require.NoError(t, nt.UpdateAccount(op.Address, op.Account, op.CodeLen))
|
||||
case OpUpdateStorage:
|
||||
require.NoError(t, bt.UpdateStorage(op.Address, op.Slot, op.Value))
|
||||
require.NoError(t, nt.UpdateStorage(op.Address, op.Slot, op.Value))
|
||||
case OpUpdateCode:
|
||||
require.NoError(t, bt.UpdateContractCode(op.Address, common.Hash{}, op.Code))
|
||||
require.NoError(t, nt.UpdateContractCode(op.Address, common.Hash{}, op.Code))
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test configurations
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
var (
|
||||
smallConfig = StateGenConfig{
|
||||
NumAccounts: 100,
|
||||
NumContracts: 50,
|
||||
MinSlots: 1,
|
||||
MaxSlots: 20,
|
||||
CodeSize: 128,
|
||||
Distribution: PowerLaw,
|
||||
Seed: 42,
|
||||
}
|
||||
mediumConfig = StateGenConfig{
|
||||
NumAccounts: 1_000,
|
||||
NumContracts: 500,
|
||||
MinSlots: 1,
|
||||
MaxSlots: 100,
|
||||
CodeSize: 256,
|
||||
Distribution: PowerLaw,
|
||||
Seed: 42,
|
||||
}
|
||||
largeConfig = StateGenConfig{
|
||||
NumAccounts: 10_000,
|
||||
NumContracts: 5_000,
|
||||
MinSlots: 1,
|
||||
MaxSlots: 500,
|
||||
CodeSize: 512,
|
||||
Distribution: PowerLaw,
|
||||
Seed: 42,
|
||||
}
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// TestRootEquality generates realistic state at various sizes and verifies
|
||||
// that bintrie and NOMT produce identical state roots after each block.
|
||||
func TestRootEquality(t *testing.T) {
|
||||
configs := map[string]StateGenConfig{
|
||||
"Small": smallConfig,
|
||||
}
|
||||
if !testing.Short() {
|
||||
configs["Medium"] = mediumConfig
|
||||
configs["Large"] = largeConfig
|
||||
}
|
||||
|
||||
for name, cfg := range configs {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
blocks := GenerateBlocks(cfg)
|
||||
htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, (cfg.MinSlots+cfg.MaxSlots)/2)
|
||||
|
||||
bt := newBintrie(t)
|
||||
nt, _ := newNomtTrieWithDir(t, htCap)
|
||||
|
||||
for blockIdx, ops := range blocks {
|
||||
for _, op := range ops {
|
||||
applyOp(t, bt, nt, op)
|
||||
}
|
||||
binRoot := bt.Hash()
|
||||
nomtRoot := nt.Hash()
|
||||
|
||||
t.Logf("block %d: %d ops, bintrie=%x nomt=%x",
|
||||
blockIdx, len(ops), binRoot[:8], nomtRoot[:8])
|
||||
|
||||
assert.NotEqual(t, common.Hash{}, binRoot,
|
||||
"bintrie root should be non-zero at block %d", blockIdx)
|
||||
assert.Equal(t, binRoot, nomtRoot,
|
||||
"root mismatch at block %d", blockIdx)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestDeterminism runs the same seed twice and verifies identical roots.
|
||||
func TestDeterminism(t *testing.T) {
|
||||
computeRoot := func() common.Hash {
|
||||
blocks := GenerateBlocks(smallConfig)
|
||||
htCap := estimateHTCapacity(
|
||||
smallConfig.NumAccounts, smallConfig.NumContracts,
|
||||
(smallConfig.MinSlots+smallConfig.MaxSlots)/2,
|
||||
)
|
||||
nt, _ := newNomtTrieWithDir(t, htCap)
|
||||
bt := newBintrie(t)
|
||||
var root common.Hash
|
||||
for _, ops := range blocks {
|
||||
for _, op := range ops {
|
||||
applyOp(t, bt, nt, op)
|
||||
}
|
||||
root = nt.Hash()
|
||||
bt.Hash() // flush bintrie too
|
||||
}
|
||||
return root
|
||||
}
|
||||
|
||||
root1 := computeRoot()
|
||||
root2 := computeRoot()
|
||||
assert.Equal(t, root1, root2, "same seed must produce same root")
|
||||
}
|
||||
|
||||
// TestDistributionVariants runs Small config with each distribution type
|
||||
// and verifies matching roots for all variants.
|
||||
func TestDistributionVariants(t *testing.T) {
|
||||
distributions := []struct {
|
||||
name string
|
||||
dist Distribution
|
||||
}{
|
||||
{"PowerLaw", PowerLaw},
|
||||
{"Uniform", Uniform},
|
||||
{"Exponential", Exponential},
|
||||
}
|
||||
|
||||
for _, d := range distributions {
|
||||
t.Run(d.name, func(t *testing.T) {
|
||||
cfg := smallConfig
|
||||
cfg.Distribution = d.dist
|
||||
cfg.Seed = 123 // same seed for all
|
||||
|
||||
blocks := GenerateBlocks(cfg)
|
||||
htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, (cfg.MinSlots+cfg.MaxSlots)/2)
|
||||
|
||||
bt := newBintrie(t)
|
||||
nt, _ := newNomtTrieWithDir(t, htCap)
|
||||
|
||||
var binRoot, nomtRoot common.Hash
|
||||
for _, ops := range blocks {
|
||||
for _, op := range ops {
|
||||
applyOp(t, bt, nt, op)
|
||||
}
|
||||
binRoot = bt.Hash()
|
||||
nomtRoot = nt.Hash()
|
||||
}
|
||||
|
||||
t.Logf("dist=%s bintrie=%x nomt=%x", d.name, binRoot[:8], nomtRoot[:8])
|
||||
assert.Equal(t, binRoot, nomtRoot,
|
||||
"root mismatch with %s distribution", d.name)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestIncrementalRootEquality hashes after every single operation in the
|
||||
// first block, catching ordering-sensitive bugs.
|
||||
func TestIncrementalRootEquality(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("incremental test is slow")
|
||||
}
|
||||
|
||||
// Use a smaller config to keep hash-per-op feasible.
|
||||
cfg := StateGenConfig{
|
||||
NumAccounts: 20,
|
||||
NumContracts: 10,
|
||||
MinSlots: 1,
|
||||
MaxSlots: 5,
|
||||
CodeSize: 64,
|
||||
Distribution: Uniform,
|
||||
Seed: 99,
|
||||
}
|
||||
blocks := GenerateBlocks(cfg)
|
||||
htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, 3)
|
||||
|
||||
bt := newBintrie(t)
|
||||
nt, _ := newNomtTrieWithDir(t, htCap)
|
||||
|
||||
for i, op := range blocks[0] {
|
||||
applyOp(t, bt, nt, op)
|
||||
binRoot := bt.Hash()
|
||||
nomtRoot := nt.Hash()
|
||||
|
||||
if binRoot != nomtRoot {
|
||||
t.Fatalf("root mismatch at op %d (kind=%d addr=%x): bin=%x nomt=%x",
|
||||
i, op.Kind, op.Address[:4], binRoot[:8], nomtRoot[:8])
|
||||
}
|
||||
}
|
||||
t.Logf("verified %d incremental hashes match", len(blocks[0]))
|
||||
}
|
||||
|
||||
// TestStorageFootprint populates state and measures storage used by each
|
||||
// implementation. Logs sizes and ratio.
|
||||
func TestStorageFootprint(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("storage footprint test requires medium config")
|
||||
}
|
||||
|
||||
cfg := mediumConfig
|
||||
blocks := GenerateBlocks(cfg)
|
||||
htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, (cfg.MinSlots+cfg.MaxSlots)/2)
|
||||
|
||||
bt := newBintrie(t)
|
||||
nt, nomtDir := newNomtTrieWithDir(t, htCap)
|
||||
|
||||
for _, ops := range blocks {
|
||||
for _, op := range ops {
|
||||
applyOp(t, bt, nt, op)
|
||||
}
|
||||
}
|
||||
|
||||
// Force both implementations to finalize.
|
||||
binRoot := bt.Hash()
|
||||
nomtRoot := nt.Hash()
|
||||
require.Equal(t, binRoot, nomtRoot, "roots must match before measuring storage")
|
||||
|
||||
// Bintrie: sum serialized node blobs from Commit.
|
||||
_, ns := bt.Commit(false)
|
||||
binBytes := nodesetBytes(ns)
|
||||
|
||||
// NOMT: sum file sizes on disk.
|
||||
nomtBytes := dirSize(t, nomtDir)
|
||||
|
||||
ratio := float64(nomtBytes) / float64(max(binBytes, 1))
|
||||
t.Logf("bintrie serialized nodes: %s (%d bytes)", humanBytes(binBytes), binBytes)
|
||||
t.Logf("NOMT bitbox on disk: %s (%d bytes)", humanBytes(nomtBytes), nomtBytes)
|
||||
t.Logf("NOMT / bintrie ratio: %.2fx", ratio)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmarks
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func BenchmarkUpdateAccount(b *testing.B) {
|
||||
cfg := smallConfig
|
||||
blocks := GenerateBlocks(cfg)
|
||||
ops := filterOps(blocks[0], OpUpdateAccount)
|
||||
htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, 10)
|
||||
|
||||
b.Run("bintrie", func(b *testing.B) {
|
||||
bt := newBintrie(b)
|
||||
b.ResetTimer()
|
||||
for i := range b.N {
|
||||
op := ops[i%len(ops)]
|
||||
_ = bt.UpdateAccount(op.Address, op.Account, op.CodeLen)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("nomt", func(b *testing.B) {
|
||||
nt, _ := newNomtTrieWithDir(b, htCap)
|
||||
b.ResetTimer()
|
||||
for i := range b.N {
|
||||
op := ops[i%len(ops)]
|
||||
_ = nt.UpdateAccount(op.Address, op.Account, op.CodeLen)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkUpdateStorage(b *testing.B) {
|
||||
cfg := smallConfig
|
||||
blocks := GenerateBlocks(cfg)
|
||||
ops := filterOps(blocks[0], OpUpdateStorage)
|
||||
htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, 10)
|
||||
|
||||
b.Run("bintrie", func(b *testing.B) {
|
||||
bt := newBintrie(b)
|
||||
b.ResetTimer()
|
||||
for i := range b.N {
|
||||
op := ops[i%len(ops)]
|
||||
_ = bt.UpdateStorage(op.Address, op.Slot, op.Value)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("nomt", func(b *testing.B) {
|
||||
nt, _ := newNomtTrieWithDir(b, htCap)
|
||||
b.ResetTimer()
|
||||
for i := range b.N {
|
||||
op := ops[i%len(ops)]
|
||||
_ = nt.UpdateStorage(op.Address, op.Slot, op.Value)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkHash(b *testing.B) {
|
||||
for _, size := range []int{100, 1000, 10000} {
|
||||
b.Run(fmt.Sprintf("size=%d", size), func(b *testing.B) {
|
||||
cfg := StateGenConfig{
|
||||
NumAccounts: size,
|
||||
NumContracts: 0,
|
||||
MinSlots: 0,
|
||||
MaxSlots: 0,
|
||||
CodeSize: 0,
|
||||
Distribution: Uniform,
|
||||
Seed: 77,
|
||||
}
|
||||
blocks := GenerateBlocks(cfg)
|
||||
htCap := estimateHTCapacity(size, 0, 0)
|
||||
|
||||
b.Run("bintrie", func(b *testing.B) {
|
||||
bt := newBintrie(b)
|
||||
for _, op := range blocks[0] {
|
||||
_ = bt.UpdateAccount(op.Address, op.Account, op.CodeLen)
|
||||
}
|
||||
bt.Hash() // baseline
|
||||
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
// Modify one account to dirty the trie.
|
||||
op := blocks[0][0]
|
||||
op.Account.Nonce++
|
||||
_ = bt.UpdateAccount(op.Address, op.Account, op.CodeLen)
|
||||
bt.Hash()
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("nomt", func(b *testing.B) {
|
||||
nt, _ := newNomtTrieWithDir(b, htCap)
|
||||
for _, op := range blocks[0] {
|
||||
_ = nt.UpdateAccount(op.Address, op.Account, op.CodeLen)
|
||||
}
|
||||
nt.Hash() // baseline
|
||||
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
op := blocks[0][0]
|
||||
op.Account.Nonce++
|
||||
_ = nt.UpdateAccount(op.Address, op.Account, op.CodeLen)
|
||||
nt.Hash()
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkBlockWorkload(b *testing.B) {
|
||||
cfg := smallConfig
|
||||
blocks := GenerateBlocks(cfg)
|
||||
htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, 10)
|
||||
|
||||
// Use block 1 (mutations) as the repeated workload.
|
||||
workload := blocks[1]
|
||||
|
||||
b.Run("bintrie", func(b *testing.B) {
|
||||
bt := newBintrie(b)
|
||||
// Apply initial state.
|
||||
for _, op := range blocks[0] {
|
||||
applyOpSingle(b, bt, op)
|
||||
}
|
||||
bt.Hash()
|
||||
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
for _, op := range workload {
|
||||
applyOpSingle(b, bt, op)
|
||||
}
|
||||
bt.Hash()
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("nomt", func(b *testing.B) {
|
||||
nt, _ := newNomtTrieWithDir(b, htCap)
|
||||
for _, op := range blocks[0] {
|
||||
applyOpSingleNomt(b, nt, op)
|
||||
}
|
||||
nt.Hash()
|
||||
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
for _, op := range workload {
|
||||
applyOpSingleNomt(b, nt, op)
|
||||
}
|
||||
nt.Hash()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// applyOpSingle applies a StateOp to a bintrie only (for benchmarks).
|
||||
func applyOpSingle(t testing.TB, bt *bintrie.BinaryTrie, op StateOp) {
|
||||
t.Helper()
|
||||
switch op.Kind {
|
||||
case OpUpdateAccount:
|
||||
_ = bt.UpdateAccount(op.Address, op.Account, op.CodeLen)
|
||||
case OpUpdateStorage:
|
||||
_ = bt.UpdateStorage(op.Address, op.Slot, op.Value)
|
||||
case OpUpdateCode:
|
||||
_ = bt.UpdateContractCode(op.Address, common.Hash{}, op.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// applyOpSingleNomt applies a StateOp to a NomtTrie only (for benchmarks).
|
||||
func applyOpSingleNomt(t testing.TB, nt *nomttrie.NomtTrie, op StateOp) {
|
||||
t.Helper()
|
||||
switch op.Kind {
|
||||
case OpUpdateAccount:
|
||||
_ = nt.UpdateAccount(op.Address, op.Account, op.CodeLen)
|
||||
case OpUpdateStorage:
|
||||
_ = nt.UpdateStorage(op.Address, op.Slot, op.Value)
|
||||
case OpUpdateCode:
|
||||
_ = nt.UpdateContractCode(op.Address, common.Hash{}, op.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// filterOps returns only operations of the given kind.
|
||||
func filterOps(ops []StateOp, kind OpKind) []StateOp {
|
||||
var out []StateOp
|
||||
for i := range ops {
|
||||
if ops[i].Kind == kind {
|
||||
out = append(out, ops[i])
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// nodesetBytes sums the serialized blob sizes from a bintrie NodeSet.
|
||||
func nodesetBytes(ns *trienode.NodeSet) int64 {
|
||||
if ns == nil {
|
||||
return 0
|
||||
}
|
||||
var total int64
|
||||
for _, node := range ns.Nodes {
|
||||
total += int64(len(node.Blob))
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// dirSize walks a directory and returns total file size in bytes.
|
||||
func dirSize(t testing.TB, dir string) int64 {
|
||||
t.Helper()
|
||||
var total int64
|
||||
err := filepath.Walk(dir, func(_ string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !info.IsDir() {
|
||||
total += info.Size()
|
||||
}
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
return total
|
||||
}
|
||||
|
||||
// estimateHTCapacity returns a power-of-2 hash table capacity for ~50% load.
|
||||
// Each account uses ~1 stem; each contract uses 1 + ceil(avgSlots/256) stems.
|
||||
func estimateHTCapacity(numAccounts, numContracts, avgSlots int) uint64 {
|
||||
stems := numAccounts + numContracts
|
||||
if avgSlots > 0 {
|
||||
stems += numContracts * ((avgSlots + 255) / 256)
|
||||
}
|
||||
// 50% load factor → double the stem count, then round up to power of 2.
|
||||
target := max(uint64(stems*2), 64)
|
||||
return 1 << bits.Len64(target-1)
|
||||
}
|
||||
|
||||
// humanBytes formats byte counts for log output.
|
||||
func humanBytes(b int64) string {
|
||||
switch {
|
||||
case b >= 1<<20:
|
||||
return fmt.Sprintf("%.1f MiB", float64(b)/(1<<20))
|
||||
case b >= 1<<10:
|
||||
return fmt.Sprintf("%.1f KiB", float64(b)/(1<<10))
|
||||
default:
|
||||
return fmt.Sprintf("%d B", b)
|
||||
}
|
||||
}
|
||||
263
trie/triecompare/stategen.go
Normal file
263
trie/triecompare/stategen.go
Normal file
|
|
@ -0,0 +1,263 @@
|
|||
// Package triecompare provides realistic Ethereum state generation and
|
||||
// comparison tests between bintrie and NOMT trie implementations.
|
||||
//
|
||||
// The state generation logic is ported from the state-actor repository's
|
||||
// generator patterns, using PowerLaw/Uniform/Exponential distributions
|
||||
// to mimic mainnet-like storage slot distributions.
|
||||
package triecompare
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math"
|
||||
mrand "math/rand"
|
||||
"sort"
|
||||
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/core/types"
|
||||
"github.com/holiman/uint256"
|
||||
)
|
||||
|
||||
// Distribution represents the storage slot distribution strategy.
|
||||
type Distribution int
|
||||
|
||||
const (
|
||||
// PowerLaw distribution — most contracts have few slots, few have many.
|
||||
// Mimics real Ethereum where contracts like Uniswap have millions of
|
||||
// slots while most have very few. Uses Pareto inverse CDF (alpha=1.5).
|
||||
PowerLaw Distribution = iota
|
||||
|
||||
// Uniform distribution — all contracts have similar slot counts.
|
||||
Uniform
|
||||
|
||||
// Exponential distribution — exponential decay in slot counts.
|
||||
Exponential
|
||||
)
|
||||
|
||||
// StateGenConfig configures synthetic state generation.
|
||||
type StateGenConfig struct {
|
||||
NumAccounts int // Number of EOA accounts
|
||||
NumContracts int // Number of contract accounts
|
||||
MinSlots int // Minimum storage slots per contract
|
||||
MaxSlots int // Maximum storage slots per contract
|
||||
CodeSize int // Average contract code size in bytes
|
||||
Distribution Distribution // Slot distribution strategy
|
||||
Seed int64 // Deterministic random seed
|
||||
}
|
||||
|
||||
// OpKind discriminates between state operation types.
|
||||
type OpKind int
|
||||
|
||||
const (
|
||||
OpUpdateAccount OpKind = iota
|
||||
OpUpdateStorage
|
||||
OpUpdateCode
|
||||
)
|
||||
|
||||
// StateOp represents a single state operation to apply to a trie.
|
||||
type StateOp struct {
|
||||
Kind OpKind
|
||||
Address common.Address
|
||||
Account *types.StateAccount // populated for OpUpdateAccount
|
||||
CodeLen int // code length for OpUpdateAccount
|
||||
Code []byte // populated for OpUpdateCode
|
||||
Slot []byte // 32-byte key for OpUpdateStorage
|
||||
Value []byte // raw value for OpUpdateStorage
|
||||
}
|
||||
|
||||
// GenerateBlocks produces deterministic blocks of state operations.
|
||||
// Block 0 = initial state creation (all accounts, storage, code).
|
||||
// Block 1 = incremental mutations (nonce bumps, balance changes, storage mods).
|
||||
func GenerateBlocks(cfg StateGenConfig) [][]StateOp {
|
||||
rng := mrand.New(mrand.NewSource(cfg.Seed))
|
||||
|
||||
// Block 0: initial state.
|
||||
block0 := generateInitialState(rng, cfg)
|
||||
|
||||
// Block 1: incremental mutations on a subset of addresses.
|
||||
block1 := generateMutations(rng, cfg, block0)
|
||||
|
||||
return [][]StateOp{block0, block1}
|
||||
}
|
||||
|
||||
// generateInitialState creates the full initial state: EOAs, contracts
|
||||
// with storage and code.
|
||||
func generateInitialState(rng *mrand.Rand, cfg StateGenConfig) []StateOp {
|
||||
estimatedOps := cfg.NumAccounts*1 + cfg.NumContracts*3
|
||||
ops := make([]StateOp, 0, estimatedOps)
|
||||
|
||||
emptyCodeHash := common.HexToHash(
|
||||
"c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470",
|
||||
)
|
||||
|
||||
// EOA accounts.
|
||||
for range cfg.NumAccounts {
|
||||
var addr common.Address
|
||||
rng.Read(addr[:])
|
||||
|
||||
acc := &types.StateAccount{
|
||||
Nonce: uint64(rng.Intn(1000)),
|
||||
Balance: new(uint256.Int).Mul(uint256.NewInt(uint64(rng.Intn(1000))), uint256.NewInt(1e18)),
|
||||
CodeHash: emptyCodeHash[:],
|
||||
}
|
||||
ops = append(ops, StateOp{
|
||||
Kind: OpUpdateAccount,
|
||||
Address: addr,
|
||||
Account: acc,
|
||||
CodeLen: 0,
|
||||
})
|
||||
}
|
||||
|
||||
// Contract accounts with storage and code.
|
||||
slotDist := generateSlotDistribution(rng, cfg)
|
||||
|
||||
for i := range cfg.NumContracts {
|
||||
var addr common.Address
|
||||
rng.Read(addr[:])
|
||||
|
||||
// Generate code.
|
||||
codeSize := cfg.CodeSize + rng.Intn(max(cfg.CodeSize, 1))
|
||||
code := make([]byte, codeSize)
|
||||
rng.Read(code)
|
||||
|
||||
acc := &types.StateAccount{
|
||||
Nonce: uint64(rng.Intn(1000)),
|
||||
Balance: new(uint256.Int).Mul(uint256.NewInt(uint64(rng.Intn(100))), uint256.NewInt(1e18)),
|
||||
CodeHash: emptyCodeHash[:],
|
||||
}
|
||||
|
||||
// Account update (with code length for basicData encoding).
|
||||
ops = append(ops, StateOp{
|
||||
Kind: OpUpdateAccount,
|
||||
Address: addr,
|
||||
Account: acc,
|
||||
CodeLen: codeSize,
|
||||
})
|
||||
|
||||
// Code update.
|
||||
ops = append(ops, StateOp{
|
||||
Kind: OpUpdateCode,
|
||||
Address: addr,
|
||||
Code: code,
|
||||
})
|
||||
|
||||
// Storage slots.
|
||||
numSlots := slotDist[i]
|
||||
for range numSlots {
|
||||
slot := make([]byte, 32)
|
||||
rng.Read(slot)
|
||||
val := make([]byte, 32)
|
||||
rng.Read(val)
|
||||
// Ensure non-zero value (matches state-actor behavior).
|
||||
if val[0] == 0 && val[31] == 0 {
|
||||
val[0] = 0x01
|
||||
}
|
||||
ops = append(ops, StateOp{
|
||||
Kind: OpUpdateStorage,
|
||||
Address: addr,
|
||||
Slot: slot,
|
||||
Value: val,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return ops
|
||||
}
|
||||
|
||||
// generateMutations creates incremental state changes on a subset of
|
||||
// addresses from block 0. Modifies ~10% of accounts with nonce bumps,
|
||||
// balance changes, and new storage slots.
|
||||
func generateMutations(rng *mrand.Rand, cfg StateGenConfig, block0 []StateOp) []StateOp {
|
||||
// Collect unique addresses from block 0.
|
||||
addrSet := make(map[common.Address]bool, cfg.NumAccounts+cfg.NumContracts)
|
||||
for i := range block0 {
|
||||
if block0[i].Kind == OpUpdateAccount {
|
||||
addrSet[block0[i].Address] = true
|
||||
}
|
||||
}
|
||||
addrs := make([]common.Address, 0, len(addrSet))
|
||||
for addr := range addrSet {
|
||||
addrs = append(addrs, addr)
|
||||
}
|
||||
// Sort for deterministic iteration (map order is random in Go).
|
||||
sort.Slice(addrs, func(i, j int) bool {
|
||||
return bytes.Compare(addrs[i][:], addrs[j][:]) < 0
|
||||
})
|
||||
|
||||
// Mutate ~10% of addresses.
|
||||
numMutations := max(len(addrs)/10, 1)
|
||||
ops := make([]StateOp, 0, numMutations*2)
|
||||
|
||||
emptyCodeHash := common.HexToHash(
|
||||
"c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470",
|
||||
)
|
||||
|
||||
for range numMutations {
|
||||
addr := addrs[rng.Intn(len(addrs))]
|
||||
|
||||
// Nonce bump + balance change.
|
||||
acc := &types.StateAccount{
|
||||
Nonce: uint64(1000 + rng.Intn(1000)),
|
||||
Balance: new(uint256.Int).Mul(uint256.NewInt(uint64(rng.Intn(500))), uint256.NewInt(1e18)),
|
||||
CodeHash: emptyCodeHash[:],
|
||||
}
|
||||
ops = append(ops, StateOp{
|
||||
Kind: OpUpdateAccount,
|
||||
Address: addr,
|
||||
Account: acc,
|
||||
CodeLen: 0,
|
||||
})
|
||||
|
||||
// Add a new storage slot.
|
||||
slot := make([]byte, 32)
|
||||
rng.Read(slot)
|
||||
val := make([]byte, 32)
|
||||
rng.Read(val)
|
||||
if val[0] == 0 && val[31] == 0 {
|
||||
val[0] = 0x01
|
||||
}
|
||||
ops = append(ops, StateOp{
|
||||
Kind: OpUpdateStorage,
|
||||
Address: addr,
|
||||
Slot: slot,
|
||||
Value: val,
|
||||
})
|
||||
}
|
||||
|
||||
return ops
|
||||
}
|
||||
|
||||
// generateSlotDistribution returns the number of storage slots for each
|
||||
// contract based on the configured distribution strategy.
|
||||
// Ported from state-actor/generator/generator.go:1056-1092.
|
||||
func generateSlotDistribution(rng *mrand.Rand, cfg StateGenConfig) []int {
|
||||
dist := make([]int, cfg.NumContracts)
|
||||
|
||||
switch cfg.Distribution {
|
||||
case PowerLaw:
|
||||
alpha := 1.5
|
||||
for i := range dist {
|
||||
u := rng.Float64()
|
||||
slots := float64(cfg.MinSlots) / math.Pow(1-u, 1/alpha)
|
||||
if slots > float64(cfg.MaxSlots) {
|
||||
slots = float64(cfg.MaxSlots)
|
||||
}
|
||||
dist[i] = int(slots)
|
||||
}
|
||||
|
||||
case Exponential:
|
||||
lambda := math.Log(2) / float64(cfg.MaxSlots/4)
|
||||
for i := range dist {
|
||||
u := rng.Float64()
|
||||
slots := -math.Log(1-u) / lambda
|
||||
slots = math.Max(float64(cfg.MinSlots), math.Min(slots, float64(cfg.MaxSlots)))
|
||||
dist[i] = int(slots)
|
||||
}
|
||||
|
||||
case Uniform:
|
||||
for i := range dist {
|
||||
dist[i] = cfg.MinSlots + rng.Intn(cfg.MaxSlots-cfg.MinSlots+1)
|
||||
}
|
||||
}
|
||||
|
||||
return dist
|
||||
}
|
||||
Loading…
Reference in a new issue