nomt: add triecompare package and fix sort.SliceStable bug in stem grouping

Add trie/triecompare/ package with realistic state generation and cross- validation tests proving NOMT produces identical roots as bintrie at scale (10K+ accounts, PowerLaw/Uniform/Exponential distributions, multi-block). Fix a subtle bug in groupAndHashStems: sort.Slice was used instead of sort.SliceStable, causing non-deterministic results when the same account is mutated twice in a single block (duplicate stem+suffix entries need last-writer-wins ordering preserved). Tests: 5 correctness tests + 4 benchmarks + storage footprint comparison. All pass with race detector clean. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-19 21:31:37 +00:00 · 2026-02-13 00:07:04 +08:00 · 2026-02-13 00:07:04 +08:00 · d61dd875d8
commit d61dd875d8
parent 4a2a10ca7d
3 changed files with 788 additions and 2 deletions
--- a/trie/nomttrie/stem.go
+++ b/trie/nomttrie/stem.go
@ -86,8 +86,9 @@ func groupAndHashStems(
 		return nil, nil
 	}

-	// Sort by stem, then suffix.
-	sort.Slice(updates, func(i, j int) bool {
+	// Stable sort by stem then suffix to preserve insertion order for
+	// duplicate (stem, suffix) pairs — the last queued value must win.
+	sort.SliceStable(updates, func(i, j int) bool {
 		if updates[i].Stem != updates[j].Stem {
 			return stemLess(&updates[i].Stem, &updates[j].Stem)
 		}
--- a/trie/triecompare/compare_test.go
+++ b/trie/triecompare/compare_test.go
@ -0,0 +1,522 @@
+package triecompare
+
+import (
+	"fmt"
+	"math/bits"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/core/types"
+	"github.com/ethereum/go-ethereum/trie/bintrie"
+	"github.com/ethereum/go-ethereum/trie/nomttrie"
+	"github.com/ethereum/go-ethereum/trie/trienode"
+	"github.com/ethereum/go-ethereum/triedb"
+	"github.com/ethereum/go-ethereum/triedb/nomtdb"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// ---------------------------------------------------------------------------
+// Test helpers
+// ---------------------------------------------------------------------------
+
+func newBintrie(t testing.TB) *bintrie.BinaryTrie {
+	t.Helper()
+	diskdb := rawdb.NewMemoryDatabase()
+	trieDB := triedb.NewDatabase(diskdb, nil)
+	t.Cleanup(func() { trieDB.Close() })
+	bt, err := bintrie.NewBinaryTrie(types.EmptyRootHash, trieDB)
+	require.NoError(t, err)
+	return bt
+}
+
+func newNomtTrieWithDir(t testing.TB, htCapacity uint64) (*nomttrie.NomtTrie, string) {
+	t.Helper()
+	diskdb := rawdb.NewMemoryDatabase()
+	dir := t.TempDir()
+	backend := nomtdb.New(diskdb, &nomtdb.Config{
+		DataDir:    dir,
+		HTCapacity: htCapacity,
+	})
+	t.Cleanup(func() { backend.Close() })
+
+	nt, err := nomttrie.New(common.Hash{}, backend)
+	require.NoError(t, err)
+	return nt, dir
+}
+
+// applyOp applies a single StateOp to both bintrie and nomttrie.
+func applyOp(t testing.TB, bt *bintrie.BinaryTrie, nt *nomttrie.NomtTrie, op StateOp) {
+	t.Helper()
+	switch op.Kind {
+	case OpUpdateAccount:
+		require.NoError(t, bt.UpdateAccount(op.Address, op.Account, op.CodeLen))
+		require.NoError(t, nt.UpdateAccount(op.Address, op.Account, op.CodeLen))
+	case OpUpdateStorage:
+		require.NoError(t, bt.UpdateStorage(op.Address, op.Slot, op.Value))
+		require.NoError(t, nt.UpdateStorage(op.Address, op.Slot, op.Value))
+	case OpUpdateCode:
+		require.NoError(t, bt.UpdateContractCode(op.Address, common.Hash{}, op.Code))
+		require.NoError(t, nt.UpdateContractCode(op.Address, common.Hash{}, op.Code))
+	}
+}
+
+// ---------------------------------------------------------------------------
+// Test configurations
+// ---------------------------------------------------------------------------
+
+var (
+	smallConfig = StateGenConfig{
+		NumAccounts:  100,
+		NumContracts: 50,
+		MinSlots:     1,
+		MaxSlots:     20,
+		CodeSize:     128,
+		Distribution: PowerLaw,
+		Seed:         42,
+	}
+	mediumConfig = StateGenConfig{
+		NumAccounts:  1_000,
+		NumContracts: 500,
+		MinSlots:     1,
+		MaxSlots:     100,
+		CodeSize:     256,
+		Distribution: PowerLaw,
+		Seed:         42,
+	}
+	largeConfig = StateGenConfig{
+		NumAccounts:  10_000,
+		NumContracts: 5_000,
+		MinSlots:     1,
+		MaxSlots:     500,
+		CodeSize:     512,
+		Distribution: PowerLaw,
+		Seed:         42,
+	}
+)
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+// TestRootEquality generates realistic state at various sizes and verifies
+// that bintrie and NOMT produce identical state roots after each block.
+func TestRootEquality(t *testing.T) {
+	configs := map[string]StateGenConfig{
+		"Small": smallConfig,
+	}
+	if !testing.Short() {
+		configs["Medium"] = mediumConfig
+		configs["Large"] = largeConfig
+	}
+
+	for name, cfg := range configs {
+		t.Run(name, func(t *testing.T) {
+			blocks := GenerateBlocks(cfg)
+			htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, (cfg.MinSlots+cfg.MaxSlots)/2)
+
+			bt := newBintrie(t)
+			nt, _ := newNomtTrieWithDir(t, htCap)
+
+			for blockIdx, ops := range blocks {
+				for _, op := range ops {
+					applyOp(t, bt, nt, op)
+				}
+				binRoot := bt.Hash()
+				nomtRoot := nt.Hash()
+
+				t.Logf("block %d: %d ops, bintrie=%x nomt=%x",
+					blockIdx, len(ops), binRoot[:8], nomtRoot[:8])
+
+				assert.NotEqual(t, common.Hash{}, binRoot,
+					"bintrie root should be non-zero at block %d", blockIdx)
+				assert.Equal(t, binRoot, nomtRoot,
+					"root mismatch at block %d", blockIdx)
+			}
+		})
+	}
+}
+
+// TestDeterminism runs the same seed twice and verifies identical roots.
+func TestDeterminism(t *testing.T) {
+	computeRoot := func() common.Hash {
+		blocks := GenerateBlocks(smallConfig)
+		htCap := estimateHTCapacity(
+			smallConfig.NumAccounts, smallConfig.NumContracts,
+			(smallConfig.MinSlots+smallConfig.MaxSlots)/2,
+		)
+		nt, _ := newNomtTrieWithDir(t, htCap)
+		bt := newBintrie(t)
+		var root common.Hash
+		for _, ops := range blocks {
+			for _, op := range ops {
+				applyOp(t, bt, nt, op)
+			}
+			root = nt.Hash()
+			bt.Hash() // flush bintrie too
+		}
+		return root
+	}
+
+	root1 := computeRoot()
+	root2 := computeRoot()
+	assert.Equal(t, root1, root2, "same seed must produce same root")
+}
+
+// TestDistributionVariants runs Small config with each distribution type
+// and verifies matching roots for all variants.
+func TestDistributionVariants(t *testing.T) {
+	distributions := []struct {
+		name string
+		dist Distribution
+	}{
+		{"PowerLaw", PowerLaw},
+		{"Uniform", Uniform},
+		{"Exponential", Exponential},
+	}
+
+	for _, d := range distributions {
+		t.Run(d.name, func(t *testing.T) {
+			cfg := smallConfig
+			cfg.Distribution = d.dist
+			cfg.Seed = 123 // same seed for all
+
+			blocks := GenerateBlocks(cfg)
+			htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, (cfg.MinSlots+cfg.MaxSlots)/2)
+
+			bt := newBintrie(t)
+			nt, _ := newNomtTrieWithDir(t, htCap)
+
+			var binRoot, nomtRoot common.Hash
+			for _, ops := range blocks {
+				for _, op := range ops {
+					applyOp(t, bt, nt, op)
+				}
+				binRoot = bt.Hash()
+				nomtRoot = nt.Hash()
+			}
+
+			t.Logf("dist=%s bintrie=%x nomt=%x", d.name, binRoot[:8], nomtRoot[:8])
+			assert.Equal(t, binRoot, nomtRoot,
+				"root mismatch with %s distribution", d.name)
+		})
+	}
+}
+
+// TestIncrementalRootEquality hashes after every single operation in the
+// first block, catching ordering-sensitive bugs.
+func TestIncrementalRootEquality(t *testing.T) {
+	if testing.Short() {
+		t.Skip("incremental test is slow")
+	}
+
+	// Use a smaller config to keep hash-per-op feasible.
+	cfg := StateGenConfig{
+		NumAccounts:  20,
+		NumContracts: 10,
+		MinSlots:     1,
+		MaxSlots:     5,
+		CodeSize:     64,
+		Distribution: Uniform,
+		Seed:         99,
+	}
+	blocks := GenerateBlocks(cfg)
+	htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, 3)
+
+	bt := newBintrie(t)
+	nt, _ := newNomtTrieWithDir(t, htCap)
+
+	for i, op := range blocks[0] {
+		applyOp(t, bt, nt, op)
+		binRoot := bt.Hash()
+		nomtRoot := nt.Hash()
+
+		if binRoot != nomtRoot {
+			t.Fatalf("root mismatch at op %d (kind=%d addr=%x): bin=%x nomt=%x",
+				i, op.Kind, op.Address[:4], binRoot[:8], nomtRoot[:8])
+		}
+	}
+	t.Logf("verified %d incremental hashes match", len(blocks[0]))
+}
+
+// TestStorageFootprint populates state and measures storage used by each
+// implementation. Logs sizes and ratio.
+func TestStorageFootprint(t *testing.T) {
+	if testing.Short() {
+		t.Skip("storage footprint test requires medium config")
+	}
+
+	cfg := mediumConfig
+	blocks := GenerateBlocks(cfg)
+	htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, (cfg.MinSlots+cfg.MaxSlots)/2)
+
+	bt := newBintrie(t)
+	nt, nomtDir := newNomtTrieWithDir(t, htCap)
+
+	for _, ops := range blocks {
+		for _, op := range ops {
+			applyOp(t, bt, nt, op)
+		}
+	}
+
+	// Force both implementations to finalize.
+	binRoot := bt.Hash()
+	nomtRoot := nt.Hash()
+	require.Equal(t, binRoot, nomtRoot, "roots must match before measuring storage")
+
+	// Bintrie: sum serialized node blobs from Commit.
+	_, ns := bt.Commit(false)
+	binBytes := nodesetBytes(ns)
+
+	// NOMT: sum file sizes on disk.
+	nomtBytes := dirSize(t, nomtDir)
+
+	ratio := float64(nomtBytes) / float64(max(binBytes, 1))
+	t.Logf("bintrie serialized nodes: %s (%d bytes)", humanBytes(binBytes), binBytes)
+	t.Logf("NOMT bitbox on disk:      %s (%d bytes)", humanBytes(nomtBytes), nomtBytes)
+	t.Logf("NOMT / bintrie ratio:     %.2fx", ratio)
+}
+
+// ---------------------------------------------------------------------------
+// Benchmarks
+// ---------------------------------------------------------------------------
+
+func BenchmarkUpdateAccount(b *testing.B) {
+	cfg := smallConfig
+	blocks := GenerateBlocks(cfg)
+	ops := filterOps(blocks[0], OpUpdateAccount)
+	htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, 10)
+
+	b.Run("bintrie", func(b *testing.B) {
+		bt := newBintrie(b)
+		b.ResetTimer()
+		for i := range b.N {
+			op := ops[i%len(ops)]
+			_ = bt.UpdateAccount(op.Address, op.Account, op.CodeLen)
+		}
+	})
+
+	b.Run("nomt", func(b *testing.B) {
+		nt, _ := newNomtTrieWithDir(b, htCap)
+		b.ResetTimer()
+		for i := range b.N {
+			op := ops[i%len(ops)]
+			_ = nt.UpdateAccount(op.Address, op.Account, op.CodeLen)
+		}
+	})
+}
+
+func BenchmarkUpdateStorage(b *testing.B) {
+	cfg := smallConfig
+	blocks := GenerateBlocks(cfg)
+	ops := filterOps(blocks[0], OpUpdateStorage)
+	htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, 10)
+
+	b.Run("bintrie", func(b *testing.B) {
+		bt := newBintrie(b)
+		b.ResetTimer()
+		for i := range b.N {
+			op := ops[i%len(ops)]
+			_ = bt.UpdateStorage(op.Address, op.Slot, op.Value)
+		}
+	})
+
+	b.Run("nomt", func(b *testing.B) {
+		nt, _ := newNomtTrieWithDir(b, htCap)
+		b.ResetTimer()
+		for i := range b.N {
+			op := ops[i%len(ops)]
+			_ = nt.UpdateStorage(op.Address, op.Slot, op.Value)
+		}
+	})
+}
+
+func BenchmarkHash(b *testing.B) {
+	for _, size := range []int{100, 1000, 10000} {
+		b.Run(fmt.Sprintf("size=%d", size), func(b *testing.B) {
+			cfg := StateGenConfig{
+				NumAccounts:  size,
+				NumContracts: 0,
+				MinSlots:     0,
+				MaxSlots:     0,
+				CodeSize:     0,
+				Distribution: Uniform,
+				Seed:         77,
+			}
+			blocks := GenerateBlocks(cfg)
+			htCap := estimateHTCapacity(size, 0, 0)
+
+			b.Run("bintrie", func(b *testing.B) {
+				bt := newBintrie(b)
+				for _, op := range blocks[0] {
+					_ = bt.UpdateAccount(op.Address, op.Account, op.CodeLen)
+				}
+				bt.Hash() // baseline
+
+				b.ResetTimer()
+				for range b.N {
+					// Modify one account to dirty the trie.
+					op := blocks[0][0]
+					op.Account.Nonce++
+					_ = bt.UpdateAccount(op.Address, op.Account, op.CodeLen)
+					bt.Hash()
+				}
+			})
+
+			b.Run("nomt", func(b *testing.B) {
+				nt, _ := newNomtTrieWithDir(b, htCap)
+				for _, op := range blocks[0] {
+					_ = nt.UpdateAccount(op.Address, op.Account, op.CodeLen)
+				}
+				nt.Hash() // baseline
+
+				b.ResetTimer()
+				for range b.N {
+					op := blocks[0][0]
+					op.Account.Nonce++
+					_ = nt.UpdateAccount(op.Address, op.Account, op.CodeLen)
+					nt.Hash()
+				}
+			})
+		})
+	}
+}
+
+func BenchmarkBlockWorkload(b *testing.B) {
+	cfg := smallConfig
+	blocks := GenerateBlocks(cfg)
+	htCap := estimateHTCapacity(cfg.NumAccounts, cfg.NumContracts, 10)
+
+	// Use block 1 (mutations) as the repeated workload.
+	workload := blocks[1]
+
+	b.Run("bintrie", func(b *testing.B) {
+		bt := newBintrie(b)
+		// Apply initial state.
+		for _, op := range blocks[0] {
+			applyOpSingle(b, bt, op)
+		}
+		bt.Hash()
+
+		b.ResetTimer()
+		for range b.N {
+			for _, op := range workload {
+				applyOpSingle(b, bt, op)
+			}
+			bt.Hash()
+		}
+	})
+
+	b.Run("nomt", func(b *testing.B) {
+		nt, _ := newNomtTrieWithDir(b, htCap)
+		for _, op := range blocks[0] {
+			applyOpSingleNomt(b, nt, op)
+		}
+		nt.Hash()
+
+		b.ResetTimer()
+		for range b.N {
+			for _, op := range workload {
+				applyOpSingleNomt(b, nt, op)
+			}
+			nt.Hash()
+		}
+	})
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+// applyOpSingle applies a StateOp to a bintrie only (for benchmarks).
+func applyOpSingle(t testing.TB, bt *bintrie.BinaryTrie, op StateOp) {
+	t.Helper()
+	switch op.Kind {
+	case OpUpdateAccount:
+		_ = bt.UpdateAccount(op.Address, op.Account, op.CodeLen)
+	case OpUpdateStorage:
+		_ = bt.UpdateStorage(op.Address, op.Slot, op.Value)
+	case OpUpdateCode:
+		_ = bt.UpdateContractCode(op.Address, common.Hash{}, op.Code)
+	}
+}
+
+// applyOpSingleNomt applies a StateOp to a NomtTrie only (for benchmarks).
+func applyOpSingleNomt(t testing.TB, nt *nomttrie.NomtTrie, op StateOp) {
+	t.Helper()
+	switch op.Kind {
+	case OpUpdateAccount:
+		_ = nt.UpdateAccount(op.Address, op.Account, op.CodeLen)
+	case OpUpdateStorage:
+		_ = nt.UpdateStorage(op.Address, op.Slot, op.Value)
+	case OpUpdateCode:
+		_ = nt.UpdateContractCode(op.Address, common.Hash{}, op.Code)
+	}
+}
+
+// filterOps returns only operations of the given kind.
+func filterOps(ops []StateOp, kind OpKind) []StateOp {
+	var out []StateOp
+	for i := range ops {
+		if ops[i].Kind == kind {
+			out = append(out, ops[i])
+		}
+	}
+	return out
+}
+
+// nodesetBytes sums the serialized blob sizes from a bintrie NodeSet.
+func nodesetBytes(ns *trienode.NodeSet) int64 {
+	if ns == nil {
+		return 0
+	}
+	var total int64
+	for _, node := range ns.Nodes {
+		total += int64(len(node.Blob))
+	}
+	return total
+}
+
+// dirSize walks a directory and returns total file size in bytes.
+func dirSize(t testing.TB, dir string) int64 {
+	t.Helper()
+	var total int64
+	err := filepath.Walk(dir, func(_ string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+		if !info.IsDir() {
+			total += info.Size()
+		}
+		return nil
+	})
+	require.NoError(t, err)
+	return total
+}
+
+// estimateHTCapacity returns a power-of-2 hash table capacity for ~50% load.
+// Each account uses ~1 stem; each contract uses 1 + ceil(avgSlots/256) stems.
+func estimateHTCapacity(numAccounts, numContracts, avgSlots int) uint64 {
+	stems := numAccounts + numContracts
+	if avgSlots > 0 {
+		stems += numContracts * ((avgSlots + 255) / 256)
+	}
+	// 50% load factor → double the stem count, then round up to power of 2.
+	target := max(uint64(stems*2), 64)
+	return 1 << bits.Len64(target-1)
+}
+
+// humanBytes formats byte counts for log output.
+func humanBytes(b int64) string {
+	switch {
+	case b >= 1<<20:
+		return fmt.Sprintf("%.1f MiB", float64(b)/(1<<20))
+	case b >= 1<<10:
+		return fmt.Sprintf("%.1f KiB", float64(b)/(1<<10))
+	default:
+		return fmt.Sprintf("%d B", b)
+	}
+}
--- a/trie/triecompare/stategen.go
+++ b/trie/triecompare/stategen.go
@ -0,0 +1,263 @@
+// Package triecompare provides realistic Ethereum state generation and
+// comparison tests between bintrie and NOMT trie implementations.
+//
+// The state generation logic is ported from the state-actor repository's
+// generator patterns, using PowerLaw/Uniform/Exponential distributions
+// to mimic mainnet-like storage slot distributions.
+package triecompare
+
+import (
+	"bytes"
+	"math"
+	mrand "math/rand"
+	"sort"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/types"
+	"github.com/holiman/uint256"
+)
+
+// Distribution represents the storage slot distribution strategy.
+type Distribution int
+
+const (
+	// PowerLaw distribution — most contracts have few slots, few have many.
+	// Mimics real Ethereum where contracts like Uniswap have millions of
+	// slots while most have very few. Uses Pareto inverse CDF (alpha=1.5).
+	PowerLaw Distribution = iota
+
+	// Uniform distribution — all contracts have similar slot counts.
+	Uniform
+
+	// Exponential distribution — exponential decay in slot counts.
+	Exponential
+)
+
+// StateGenConfig configures synthetic state generation.
+type StateGenConfig struct {
+	NumAccounts  int          // Number of EOA accounts
+	NumContracts int          // Number of contract accounts
+	MinSlots     int          // Minimum storage slots per contract
+	MaxSlots     int          // Maximum storage slots per contract
+	CodeSize     int          // Average contract code size in bytes
+	Distribution Distribution // Slot distribution strategy
+	Seed         int64        // Deterministic random seed
+}
+
+// OpKind discriminates between state operation types.
+type OpKind int
+
+const (
+	OpUpdateAccount OpKind = iota
+	OpUpdateStorage
+	OpUpdateCode
+)
+
+// StateOp represents a single state operation to apply to a trie.
+type StateOp struct {
+	Kind    OpKind
+	Address common.Address
+	Account *types.StateAccount // populated for OpUpdateAccount
+	CodeLen int                 // code length for OpUpdateAccount
+	Code    []byte              // populated for OpUpdateCode
+	Slot    []byte              // 32-byte key for OpUpdateStorage
+	Value   []byte              // raw value for OpUpdateStorage
+}
+
+// GenerateBlocks produces deterministic blocks of state operations.
+// Block 0 = initial state creation (all accounts, storage, code).
+// Block 1 = incremental mutations (nonce bumps, balance changes, storage mods).
+func GenerateBlocks(cfg StateGenConfig) [][]StateOp {
+	rng := mrand.New(mrand.NewSource(cfg.Seed))
+
+	// Block 0: initial state.
+	block0 := generateInitialState(rng, cfg)
+
+	// Block 1: incremental mutations on a subset of addresses.
+	block1 := generateMutations(rng, cfg, block0)
+
+	return [][]StateOp{block0, block1}
+}
+
+// generateInitialState creates the full initial state: EOAs, contracts
+// with storage and code.
+func generateInitialState(rng *mrand.Rand, cfg StateGenConfig) []StateOp {
+	estimatedOps := cfg.NumAccounts*1 + cfg.NumContracts*3
+	ops := make([]StateOp, 0, estimatedOps)
+
+	emptyCodeHash := common.HexToHash(
+		"c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470",
+	)
+
+	// EOA accounts.
+	for range cfg.NumAccounts {
+		var addr common.Address
+		rng.Read(addr[:])
+
+		acc := &types.StateAccount{
+			Nonce:    uint64(rng.Intn(1000)),
+			Balance:  new(uint256.Int).Mul(uint256.NewInt(uint64(rng.Intn(1000))), uint256.NewInt(1e18)),
+			CodeHash: emptyCodeHash[:],
+		}
+		ops = append(ops, StateOp{
+			Kind:    OpUpdateAccount,
+			Address: addr,
+			Account: acc,
+			CodeLen: 0,
+		})
+	}
+
+	// Contract accounts with storage and code.
+	slotDist := generateSlotDistribution(rng, cfg)
+
+	for i := range cfg.NumContracts {
+		var addr common.Address
+		rng.Read(addr[:])
+
+		// Generate code.
+		codeSize := cfg.CodeSize + rng.Intn(max(cfg.CodeSize, 1))
+		code := make([]byte, codeSize)
+		rng.Read(code)
+
+		acc := &types.StateAccount{
+			Nonce:    uint64(rng.Intn(1000)),
+			Balance:  new(uint256.Int).Mul(uint256.NewInt(uint64(rng.Intn(100))), uint256.NewInt(1e18)),
+			CodeHash: emptyCodeHash[:],
+		}
+
+		// Account update (with code length for basicData encoding).
+		ops = append(ops, StateOp{
+			Kind:    OpUpdateAccount,
+			Address: addr,
+			Account: acc,
+			CodeLen: codeSize,
+		})
+
+		// Code update.
+		ops = append(ops, StateOp{
+			Kind:    OpUpdateCode,
+			Address: addr,
+			Code:    code,
+		})
+
+		// Storage slots.
+		numSlots := slotDist[i]
+		for range numSlots {
+			slot := make([]byte, 32)
+			rng.Read(slot)
+			val := make([]byte, 32)
+			rng.Read(val)
+			// Ensure non-zero value (matches state-actor behavior).
+			if val[0] == 0 && val[31] == 0 {
+				val[0] = 0x01
+			}
+			ops = append(ops, StateOp{
+				Kind:    OpUpdateStorage,
+				Address: addr,
+				Slot:    slot,
+				Value:   val,
+			})
+		}
+	}
+
+	return ops
+}
+
+// generateMutations creates incremental state changes on a subset of
+// addresses from block 0. Modifies ~10% of accounts with nonce bumps,
+// balance changes, and new storage slots.
+func generateMutations(rng *mrand.Rand, cfg StateGenConfig, block0 []StateOp) []StateOp {
+	// Collect unique addresses from block 0.
+	addrSet := make(map[common.Address]bool, cfg.NumAccounts+cfg.NumContracts)
+	for i := range block0 {
+		if block0[i].Kind == OpUpdateAccount {
+			addrSet[block0[i].Address] = true
+		}
+	}
+	addrs := make([]common.Address, 0, len(addrSet))
+	for addr := range addrSet {
+		addrs = append(addrs, addr)
+	}
+	// Sort for deterministic iteration (map order is random in Go).
+	sort.Slice(addrs, func(i, j int) bool {
+		return bytes.Compare(addrs[i][:], addrs[j][:]) < 0
+	})
+
+	// Mutate ~10% of addresses.
+	numMutations := max(len(addrs)/10, 1)
+	ops := make([]StateOp, 0, numMutations*2)
+
+	emptyCodeHash := common.HexToHash(
+		"c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470",
+	)
+
+	for range numMutations {
+		addr := addrs[rng.Intn(len(addrs))]
+
+		// Nonce bump + balance change.
+		acc := &types.StateAccount{
+			Nonce:    uint64(1000 + rng.Intn(1000)),
+			Balance:  new(uint256.Int).Mul(uint256.NewInt(uint64(rng.Intn(500))), uint256.NewInt(1e18)),
+			CodeHash: emptyCodeHash[:],
+		}
+		ops = append(ops, StateOp{
+			Kind:    OpUpdateAccount,
+			Address: addr,
+			Account: acc,
+			CodeLen: 0,
+		})
+
+		// Add a new storage slot.
+		slot := make([]byte, 32)
+		rng.Read(slot)
+		val := make([]byte, 32)
+		rng.Read(val)
+		if val[0] == 0 && val[31] == 0 {
+			val[0] = 0x01
+		}
+		ops = append(ops, StateOp{
+			Kind:    OpUpdateStorage,
+			Address: addr,
+			Slot:    slot,
+			Value:   val,
+		})
+	}
+
+	return ops
+}
+
+// generateSlotDistribution returns the number of storage slots for each
+// contract based on the configured distribution strategy.
+// Ported from state-actor/generator/generator.go:1056-1092.
+func generateSlotDistribution(rng *mrand.Rand, cfg StateGenConfig) []int {
+	dist := make([]int, cfg.NumContracts)
+
+	switch cfg.Distribution {
+	case PowerLaw:
+		alpha := 1.5
+		for i := range dist {
+			u := rng.Float64()
+			slots := float64(cfg.MinSlots) / math.Pow(1-u, 1/alpha)
+			if slots > float64(cfg.MaxSlots) {
+				slots = float64(cfg.MaxSlots)
+			}
+			dist[i] = int(slots)
+		}
+
+	case Exponential:
+		lambda := math.Log(2) / float64(cfg.MaxSlots/4)
+		for i := range dist {
+			u := rng.Float64()
+			slots := -math.Log(1-u) / lambda
+			slots = math.Max(float64(cfg.MinSlots), math.Min(slots, float64(cfg.MaxSlots)))
+			dist[i] = int(slots)
+		}
+
+	case Uniform:
+		for i := range dist {
+			dist[i] = cfg.MinSlots + rng.Intn(cfg.MaxSlots-cfg.MinSlots+1)
+		}
+	}
+
+	return dist
+}