This commit is contained in:
CPerezz 2026-04-16 14:00:44 +05:30 committed by GitHub
commit 40224091fb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
53 changed files with 8158 additions and 1667 deletions

View file

@ -72,11 +72,10 @@ var (
accountReadTimer = metrics.NewRegisteredResettingTimer("chain/account/reads", nil)
accountHashTimer = metrics.NewRegisteredResettingTimer("chain/account/hashes", nil)
accountUpdateTimer = metrics.NewRegisteredResettingTimer("chain/account/updates", nil)
accountCommitTimer = metrics.NewRegisteredResettingTimer("chain/account/commits", nil)
hasherCommitTimer = metrics.NewRegisteredResettingTimer("chain/trie/commits", nil)
storageReadTimer = metrics.NewRegisteredResettingTimer("chain/storage/reads", nil)
storageUpdateTimer = metrics.NewRegisteredResettingTimer("chain/storage/updates", nil)
storageCommitTimer = metrics.NewRegisteredResettingTimer("chain/storage/commits", nil)
codeReadTimer = metrics.NewRegisteredResettingTimer("chain/code/reads", nil)
codeReadBytesTimer = metrics.NewRegisteredResettingTimer("chain/code/readbytes", nil)
@ -2112,30 +2111,53 @@ type ExecuteConfig struct {
// it writes the block and associated state to database.
func (bc *BlockChain) ProcessBlock(ctx context.Context, parentRoot common.Hash, block *types.Block, config ExecuteConfig) (result *blockProcessingResult, blockEndErr error) {
var (
err error
startTime = time.Now()
statedb *state.StateDB
interrupt atomic.Bool
sdb = state.NewDatabase(bc.triedb, bc.codedb).WithSnapshot(bc.snaps)
err error
startTime = time.Now()
interrupt atomic.Bool
sdb = state.NewDatabase(bc.triedb, bc.codedb).WithSnapshot(bc.snaps)
makeWitness bool
throwaway *state.StateDB // StateDB for speculative transaction pre-executor
statedb *state.StateDB // StateDB for sequential transaction executor
)
if bc.chainConfig.IsByzantium(block.Number()) && (config.StatelessSelfValidation || config.MakeWitness) {
makeWitness = true
}
defer interrupt.Store(true) // terminate the prefetch at the end
// Enable trie node prewarming after the Byzantium fork. Before that, state
// computation occurs at transaction boundaries, making prewarming ineffective.
// The read-only state should also be prewarmed to construct a comprehensive
// execution witness.
if bc.chainConfig.IsByzantium(block.Number()) {
sdb = sdb.EnablePrefetch(makeWitness)
// Explicitly terminate all the background prefetcher. This is essential
// to prevent goroutine leaks.
defer func() {
if statedb != nil {
statedb.StopPrefetcher()
}
if throwaway != nil {
throwaway.StopPrefetcher()
}
}()
}
if bc.cfg.NoPrefetch {
statedb, err = state.New(parentRoot, sdb)
if err != nil {
return nil, err
}
} else {
// If prefetching is enabled, run that against the current state to pre-cache
// transactions and probabilistically some of the account/storage trie nodes.
//
// Note: the main processor and prefetcher share the same reader with a local
// cache for mitigating the overhead of state access.
// If transaction prefetching is enabled, run that against the current state
// to pre-cache transactions. Note: the main processor and prefetcher share
// the same reader with a local cache for mitigating the overhead of state
// access.
prefetch, process, err := sdb.ReadersWithCacheStats(parentRoot)
if err != nil {
return nil, err
}
throwaway, err := state.NewWithReader(parentRoot, sdb, prefetch)
throwaway, err = state.NewWithReader(parentRoot, sdb, prefetch)
if err != nil {
return nil, err
}
@ -2171,20 +2193,16 @@ func (bc *BlockChain) ProcessBlock(ctx context.Context, parentRoot common.Hash,
// while processing transactions. Before Byzantium the prefetcher is mostly
// useless due to the intermediate root hashing after each transaction.
var witness *stateless.Witness
if bc.chainConfig.IsByzantium(block.Number()) {
if makeWitness {
// Generate witnesses either if we're self-testing, or if it's the
// only block being inserted. A bit crude, but witnesses are huge,
// so we refuse to make an entire chain of them.
if config.StatelessSelfValidation || config.MakeWitness {
witness, err = stateless.NewWitness(block.Header(), bc, config.EnableWitnessStats)
if err != nil {
return nil, err
}
witness, err = stateless.NewWitness(block.Header(), bc, config.EnableWitnessStats)
if err != nil {
return nil, err
}
statedb.StartPrefetcher("chain", witness)
defer statedb.StopPrefetcher()
statedb.TraceWitness(witness)
}
// Instrument the blockchain tracing
if config.EnableTracer {
if bc.logger != nil && bc.logger.OnBlockStart != nil {
@ -2222,64 +2240,10 @@ func (bc *BlockChain) ProcessBlock(ctx context.Context, parentRoot common.Hash,
}
vtime := time.Since(vstart)
// If witnesses was generated and stateless self-validation requested, do
// that now. Self validation should *never* run in production, it's more of
// a tight integration to enable running *all* consensus tests through the
// witness builder/runner, which would otherwise be impossible due to the
// various invalid chain states/behaviors being contained in those tests.
xvstart := time.Now()
if witness := statedb.Witness(); witness != nil && config.StatelessSelfValidation {
log.Warn("Running stateless self-validation", "block", block.Number(), "hash", block.Hash())
// Remove critical computed fields from the block to force true recalculation
context := block.Header()
context.Root = common.Hash{}
context.ReceiptHash = common.Hash{}
task := types.NewBlockWithHeader(context).WithBody(*block.Body())
// Run the stateless self-cross-validation
crossStateRoot, crossReceiptRoot, err := ExecuteStateless(ctx, bc.chainConfig, bc.cfg.VmConfig, task, witness)
if err != nil {
return nil, fmt.Errorf("stateless self-validation failed: %v", err)
}
if crossStateRoot != block.Root() {
return nil, fmt.Errorf("stateless self-validation root mismatch (cross: %x local: %x)", crossStateRoot, block.Root())
}
if crossReceiptRoot != block.ReceiptHash() {
return nil, fmt.Errorf("stateless self-validation receipt root mismatch (cross: %x local: %x)", crossReceiptRoot, block.ReceiptHash())
}
}
var (
xvtime = time.Since(xvstart)
proctime = time.Since(startTime) // processing + validation + cross validation
stats = &ExecuteStats{}
stats = NewExecuteStats(statedb, ptime, vtime)
)
// Update the metrics touched during block processing and validation
stats.AccountReads = statedb.AccountReads // Account reads are complete(in processing)
stats.StorageReads = statedb.StorageReads // Storage reads are complete(in processing)
stats.AccountUpdates = statedb.AccountUpdates // Account updates are complete(in validation)
stats.StorageUpdates = statedb.StorageUpdates // Storage updates are complete(in validation)
stats.AccountHashes = statedb.AccountHashes // Account hashes are complete(in validation)
stats.CodeReads = statedb.CodeReads
stats.AccountLoaded = statedb.AccountLoaded
stats.AccountUpdated = statedb.AccountUpdated
stats.AccountDeleted = statedb.AccountDeleted
stats.StorageLoaded = statedb.StorageLoaded
stats.StorageUpdated = int(statedb.StorageUpdated.Load())
stats.StorageDeleted = int(statedb.StorageDeleted.Load())
stats.CodeLoaded = statedb.CodeLoaded
stats.CodeLoadBytes = statedb.CodeLoadBytes
stats.CodeUpdated = statedb.CodeUpdated
stats.CodeUpdateBytes = statedb.CodeUpdateBytes
stats.Execution = ptime - (statedb.AccountReads + statedb.StorageReads + statedb.CodeReads) // The time spent on EVM processing
stats.Validation = vtime - (statedb.AccountHashes + statedb.AccountUpdates + statedb.StorageUpdates) // The time spent on block validation
stats.CrossValidation = xvtime // The time spent on stateless cross validation
// Write the block to the chain and get the status.
var status WriteStatus
if config.WriteState {
@ -2294,10 +2258,9 @@ func (bc *BlockChain) ProcessBlock(ctx context.Context, parentRoot common.Hash,
return nil, err
}
// Update the metrics touched during block commit
stats.AccountCommits = statedb.AccountCommits // Account commits are complete, we can mark them
stats.StorageCommits = statedb.StorageCommits // Storage commits are complete, we can mark them
stats.HasherCommit = statedb.HasherCommits // Storage commits are complete, we can mark them
stats.DatabaseCommit = statedb.DatabaseCommits // Database commits are complete, we can mark them
stats.BlockWrite = time.Since(wstart) - max(statedb.AccountCommits, statedb.StorageCommits) /* concurrent */ - statedb.DatabaseCommits
stats.BlockWrite = time.Since(wstart) - statedb.HasherCommits - statedb.DatabaseCommits
}
// Report the collected witness statistics
if witness != nil {
@ -2307,6 +2270,11 @@ func (bc *BlockChain) ProcessBlock(ctx context.Context, parentRoot common.Hash,
stats.TotalTime = elapsed
stats.MgasPerSecond = float64(res.GasUsed) * 1000 / float64(elapsed)
if config.StatelessSelfValidation {
if err := bc.crossValidation(ctx, statedb, block); err != nil {
return nil, err
}
}
return &blockProcessingResult{
usedGas: res.GasUsed,
procTime: proctime,
@ -2316,6 +2284,39 @@ func (bc *BlockChain) ProcessBlock(ctx context.Context, parentRoot common.Hash,
}, nil
}
func (bc *BlockChain) crossValidation(ctx context.Context, statedb *state.StateDB, block *types.Block) error {
// If witnesses was generated and stateless self-validation requested, do
// that now. Self validation should *never* run in production, it's more of
// a tight integration to enable running *all* consensus tests through the
// witness builder/runner, which would otherwise be impossible due to the
// various invalid chain states/behaviors being contained in those tests.
if witness := statedb.Witness(); witness != nil {
xvstart := time.Now()
log.Warn("Running stateless self-validation", "block", block.Number(), "hash", block.Hash())
// Remove critical computed fields from the block to force true recalculation
context := block.Header()
context.Root = common.Hash{}
context.ReceiptHash = common.Hash{}
task := types.NewBlockWithHeader(context).WithBody(*block.Body())
// Run the stateless self-cross-validation
crossStateRoot, crossReceiptRoot, err := ExecuteStateless(ctx, bc.chainConfig, bc.cfg.VmConfig, task, witness)
if err != nil {
return fmt.Errorf("stateless self-validation failed: %v", err)
}
if crossStateRoot != block.Root() {
return fmt.Errorf("stateless self-validation root mismatch (cross: %x local: %x)", crossStateRoot, block.Root())
}
if crossReceiptRoot != block.ReceiptHash() {
return fmt.Errorf("stateless self-validation receipt root mismatch (cross: %x local: %x)", crossReceiptRoot, block.ReceiptHash())
}
blockCrossValidationTimer.UpdateSince(xvstart)
}
return nil
}
// insertSideChain is called when an import batch hits upon a pruned ancestor
// error, which happens when a sidechain with a sufficiently old fork-block is
// found.

View file

@ -424,6 +424,25 @@ func (bc *BlockChain) StateAt(root common.Hash) (*state.StateDB, error) {
return state.New(root, state.NewDatabase(bc.triedb, bc.codedb).WithSnapshot(bc.snaps))
}
// StateConfig specifies the configuration for initializating the stateDB.
type StateConfig struct {
Prefetch bool
PrefetchRead bool
WithSnapshot bool
}
// StateWithConfig returns a new mutable state based on a particular point in time.
func (bc *BlockChain) StateWithConfig(root common.Hash, config StateConfig) (*state.StateDB, error) {
sdb := state.NewDatabase(bc.triedb, bc.codedb)
if config.WithSnapshot {
sdb = sdb.WithSnapshot(bc.snaps)
}
if config.Prefetch {
sdb = sdb.EnablePrefetch(config.PrefetchRead)
}
return state.New(root, sdb)
}
// HistoricState returns a historic state specified by the given root.
// Live states are not available and won't be served, please use `State`
// or `StateAt` instead.

View file

@ -29,14 +29,27 @@ import (
// ExecuteStats includes all the statistics of a block execution in details.
type ExecuteStats struct {
// State read times
AccountReads time.Duration // Time spent on the account reads
StorageReads time.Duration // Time spent on the storage reads
AccountReads time.Duration // Time spent on the account reads
StorageReads time.Duration // Time spent on the storage reads
CodeReads time.Duration // Time spent on the contract code read
// State hash times
AccountHashes time.Duration // Time spent on the account trie hash
AccountUpdates time.Duration // Time spent on the account trie update
AccountCommits time.Duration // Time spent on the account trie commit
StorageUpdates time.Duration // Time spent on the storage trie update
StorageCommits time.Duration // Time spent on the storage trie commit
CodeReads time.Duration // Time spent on the contract code read
// EVM execution and validation time
Execution time.Duration // Time spent on the EVM execution
Validation time.Duration // Time spent on the block validation
// Commit times
HasherCommit time.Duration // Time spent on trie commit
DatabaseCommit time.Duration // Time spent on database commit
BlockWrite time.Duration // Time spent on block write
// Others
TotalTime time.Duration // The total time spent on block execution
MgasPerSecond float64 // The million gas processed per second
AccountLoaded int // Number of accounts loaded
AccountUpdated int // Number of accounts updated
@ -49,19 +62,39 @@ type ExecuteStats struct {
CodeUpdated int // Number of contract code written (CREATE/CREATE2 + EIP-7702)
CodeUpdateBytes int // Total bytes of code written
Execution time.Duration // Time spent on the EVM execution
Validation time.Duration // Time spent on the block validation
CrossValidation time.Duration // Optional, time spent on the block cross validation
DatabaseCommit time.Duration // Time spent on database commit
BlockWrite time.Duration // Time spent on block write
TotalTime time.Duration // The total time spent on block execution
MgasPerSecond float64 // The million gas processed per second
// Cache hit rates
StateReadCacheStats state.ReaderStats
StatePrefetchCacheStats state.ReaderStats
}
func NewExecuteStats(stateDB *state.StateDB, process time.Duration, validation time.Duration) *ExecuteStats {
return &ExecuteStats{
// State read times
AccountReads: stateDB.AccountReads,
StorageReads: stateDB.StorageReads,
CodeReads: stateDB.CodeReads,
// State hash times
AccountHashes: stateDB.AccountHashes,
AccountUpdates: stateDB.AccountUpdates,
StorageUpdates: stateDB.StorageUpdates,
Execution: process - stateDB.StateReadTime(),
Validation: validation - stateDB.StateHashTime(),
AccountLoaded: stateDB.AccountLoaded,
AccountUpdated: stateDB.AccountUpdated,
AccountDeleted: stateDB.AccountDeleted,
StorageLoaded: stateDB.StorageLoaded,
StorageUpdated: int(stateDB.StorageUpdated.Load()),
StorageDeleted: int(stateDB.StorageDeleted.Load()),
CodeLoaded: stateDB.CodeLoaded,
CodeLoadBytes: stateDB.CodeLoadBytes,
CodeUpdated: stateDB.CodeUpdated,
CodeUpdateBytes: stateDB.CodeUpdateBytes,
}
}
// reportMetrics uploads execution statistics to the metrics system.
func (s *ExecuteStats) reportMetrics() {
if s.AccountLoaded != 0 {
@ -80,12 +113,10 @@ func (s *ExecuteStats) reportMetrics() {
accountUpdateTimer.Update(s.AccountUpdates) // Account updates are complete(in validation)
storageUpdateTimer.Update(s.StorageUpdates) // Storage updates are complete(in validation)
accountHashTimer.Update(s.AccountHashes) // Account hashes are complete(in validation)
accountCommitTimer.Update(s.AccountCommits) // Account commits are complete, we can mark them
storageCommitTimer.Update(s.StorageCommits) // Storage commits are complete, we can mark them
hasherCommitTimer.Update(s.HasherCommit) // Trie commits are complete, we can mark them
blockExecutionTimer.Update(s.Execution) // The time spent on EVM processing
blockValidationTimer.Update(s.Validation) // The time spent on block validation
blockCrossValidationTimer.Update(s.CrossValidation) // The time spent on stateless cross validation
triedbCommitTimer.Update(s.DatabaseCommit) // Trie database commits are complete, we can mark them
blockWriteTimer.Update(s.BlockWrite) // The time spent on block write
blockInsertTimer.Update(s.TotalTime) // The total time spent on block execution
@ -206,7 +237,7 @@ func (s *ExecuteStats) logSlow(block *types.Block, slowBlockThreshold time.Durat
ExecutionMs: durationToMs(s.Execution),
StateReadMs: durationToMs(s.AccountReads + s.StorageReads + s.CodeReads),
StateHashMs: durationToMs(s.AccountHashes + s.AccountUpdates + s.StorageUpdates),
CommitMs: durationToMs(max(s.AccountCommits, s.StorageCommits) + s.DatabaseCommit + s.BlockWrite),
CommitMs: durationToMs(s.HasherCommit + s.DatabaseCommit + s.BlockWrite),
TotalMs: durationToMs(s.TotalTime),
},
Throughput: slowBlockThru{

View file

@ -112,6 +112,34 @@ func DeleteStorageSnapshot(db ethdb.KeyValueWriter, accountHash, storageHash com
}
}
// ReadBinTrieStem retrieves the flat-state stem blob for the given 31-byte
// stem. Returns nil if no entry exists under this stem.
//
// The stem blob is a packed representation of the (offset, value) pairs at
// that stem in the binary trie; callers must decode it to extract any
// specific offset. See trie/bintrie and EIP-7864 for the on-trie layout.
func ReadBinTrieStem(db ethdb.KeyValueReader, stem []byte) []byte {
data, _ := db.Get(binTrieStemKey(stem))
return data
}
// WriteBinTrieStem stores the flat-state stem blob for the given 31-byte
// stem. The blob is written verbatim; encoding/decoding is the caller's
// responsibility.
func WriteBinTrieStem(db ethdb.KeyValueWriter, stem []byte, blob []byte) {
if err := db.Put(binTrieStemKey(stem), blob); err != nil {
log.Crit("Failed to store bintrie stem", "err", err)
}
}
// DeleteBinTrieStem removes the flat-state stem blob entry for the given
// 31-byte stem.
func DeleteBinTrieStem(db ethdb.KeyValueWriter, stem []byte) {
if err := db.Delete(binTrieStemKey(stem)); err != nil {
log.Crit("Failed to delete bintrie stem", "err", err)
}
}
// IterateStorageSnapshots returns an iterator for walking the entire storage
// space of a specific account.
func IterateStorageSnapshots(db ethdb.Iteratee, accountHash common.Hash) ethdb.Iterator {

View file

@ -126,6 +126,20 @@ var (
TrieNodeStoragePrefix = []byte("O") // TrieNodeStoragePrefix + accountHash + hexPath -> trie node
stateIDPrefix = []byte("L") // stateIDPrefix + state root -> state id
// Binary-trie flat-state scheme. A stem is 31 bytes per EIP-7864 (the
// common prefix of the 32-byte tree key); the stored value is a packed
// blob containing the subset of 256 offset values that are populated
// for this stem (layout: 32-byte bitmap of present offsets, followed
// by N 32-byte values in offset order).
//
// Note: bintrie pathdb wraps the disk database in a table keyed by
// VerklePrefix ("v"), so this prefix is effectively nested inside "v"
// when used by pathdb. It is defined as a distinct top-level byte
// ("X") to prevent accidental collisions with other top-level
// namespaces (e.g. blockBodyPrefix "b") when the codec is ever used
// against an unwrapped database.
BinTrieStemPrefix = []byte("X") // BinTrieStemPrefix + stem(31B) -> stem blob
// State history indexing within path-based storage scheme
StateHistoryIndexPrefix = []byte("m") // The global prefix of state history index data
StateHistoryAccountMetadataPrefix = []byte("ma") // StateHistoryAccountMetadataPrefix + account address hash => account metadata
@ -297,6 +311,22 @@ func storageTrieNodeKey(accountHash common.Hash, path []byte) []byte {
return buf
}
// binTrieStemKey = BinTrieStemPrefix + stem (31 bytes).
//
// A bintrie stem is the common 31-byte prefix of the 32-byte tree key (see
// EIP-7864). The stem blob stored under this key holds the packed set of
// (offset, value) pairs at that stem, from which BasicData (offset 0),
// CodeHash (offset 1), header storage (offsets 64-127), code chunks
// (offsets 128-255) and main-storage slots can be extracted.
func binTrieStemKey(stem []byte) []byte {
// Callers always pass a 31-byte stem. We allocate the exact size to
// avoid accidental aliasing with backing storage.
buf := make([]byte, len(BinTrieStemPrefix)+len(stem))
n := copy(buf, BinTrieStemPrefix)
copy(buf[n:], stem)
return buf
}
// IsLegacyTrieNode reports whether a provided database entry is a legacy trie
// node. The characteristics of legacy trie node are:
// - the key length is 32 bytes

View file

@ -17,8 +17,6 @@
package state
import (
"fmt"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/overlay"
"github.com/ethereum/go-ethereum/core/rawdb"
@ -29,7 +27,6 @@ import (
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie"
"github.com/ethereum/go-ethereum/trie/bintrie"
"github.com/ethereum/go-ethereum/trie/transitiontrie"
"github.com/ethereum/go-ethereum/trie/trienode"
"github.com/ethereum/go-ethereum/triedb"
)
@ -43,6 +40,9 @@ type Database interface {
// through which the account iterator and storage iterator can be created.
Iteratee(root common.Hash) (Iteratee, error)
// Hasher returns a state hasher associated with the specified state root.
Hasher(root common.Hash) (Hasher, error)
// OpenTrie opens the main account trie.
OpenTrie(root common.Hash) (Trie, error)
@ -150,6 +150,9 @@ type CachingDB struct {
triedb *triedb.Database
codedb *CodeDB
snap *snapshot.Tree
prefetch bool
prefetchRead bool
}
// NewDatabase creates a state database with the provided data sources.
@ -177,6 +180,13 @@ func (db *CachingDB) WithSnapshot(snapshot *snapshot.Tree) *CachingDB {
return db
}
// EnablePrefetch enables the hasher prefetching feature.
func (db *CachingDB) EnablePrefetch(prefetchRead bool) *CachingDB {
db.prefetch = true
db.prefetchRead = prefetchRead
return db
}
// StateReader returns a state reader associated with the specified state root.
func (db *CachingDB) StateReader(stateRoot common.Hash) (StateReader, error) {
var readers []StateReader
@ -194,10 +204,25 @@ func (db *CachingDB) StateReader(stateRoot common.Hash) (StateReader, error) {
// This reader offers improved performance but is optional and only
// partially useful if the snapshot data in path database is not
// fully generated.
//
// For binary-trie databases the reader needs codec-specific key
// derivation (EIP-7864 stem || offset) and a separate decode path
// (BasicData/CodeHash leaves rather than slim RLP), so we install
// a bintrieFlatReader instead of the historical merkle flatReader.
// If the underlying path-database reader can't expose raw-byte
// access — e.g. a hypothetical wrapper that only implements the
// minimal database.StateReader — we silently fall through to the
// trie reader, which always works.
if db.TrieDB().Scheme() == rawdb.PathScheme {
reader, err := db.triedb.StateReader(stateRoot)
if err == nil {
readers = append(readers, newFlatReader(reader))
if db.TrieDB().IsVerkle() {
if br := newBintrieFlatReader(reader); br != nil {
readers = append(readers, br)
}
} else {
readers = append(readers, newFlatReader(reader))
}
}
}
// Configure the trie reader, which is expected to be available as the
@ -221,6 +246,15 @@ func (db *CachingDB) Reader(stateRoot common.Hash) (Reader, error) {
return newReader(db.codedb.Reader(), sr), nil
}
// Hasher implements Database, returning a hasher associated with the specified
// state root.
func (db *CachingDB) Hasher(stateRoot common.Hash) (Hasher, error) {
if db.TrieDB().IsVerkle() {
return newBinaryHasher(stateRoot, db.triedb, db.prefetch, db.prefetchRead)
}
return newMerkleHasher(stateRoot, db.triedb, db.prefetch, db.prefetchRead)
}
// ReadersWithCacheStats creates a pair of state readers that share the same
// underlying state reader and internal state cache, while maintaining separate
// statistics respectively.
@ -297,7 +331,11 @@ func (db *CachingDB) Commit(update *stateUpdate) error {
}
// If snapshotting is enabled, update the snapshot tree with this new version
if db.snap != nil && db.snap.Snapshot(update.originRoot) != nil {
if err := db.snap.Update(update.root, update.originRoot, update.accounts, update.storages); err != nil {
accounts, _, storages, _, err := update.encodeMerkle()
if err != nil {
return err
}
if err := db.snap.Update(update.root, update.originRoot, accounts, storages); err != nil {
log.Warn("Failed to update snapshot tree", "from", update.originRoot, "to", update.root, "err", err)
}
// Keep 128 diff layers in the memory, persistent layer is 129th.
@ -308,7 +346,11 @@ func (db *CachingDB) Commit(update *stateUpdate) error {
log.Warn("Failed to cap snapshot tree", "root", update.root, "layers", TriesInMemory, "err", err)
}
}
return db.triedb.Update(update.root, update.originRoot, update.blockNumber, update.nodes, update.stateSet())
stateSet, err := update.stateSet(!db.TrieDB().IsVerkle())
if err != nil {
return err
}
return db.triedb.Update(update.root, update.originRoot, update.blockNumber, update.nodes, stateSet)
}
// Iteratee returns a state iteratee associated with the specified state root,
@ -316,15 +358,3 @@ func (db *CachingDB) Commit(update *stateUpdate) error {
func (db *CachingDB) Iteratee(root common.Hash) (Iteratee, error) {
return newStateIteratee(!db.triedb.IsVerkle(), root, db.triedb, db.snap)
}
// mustCopyTrie returns a deep-copied trie.
func mustCopyTrie(t Trie) Trie {
switch t := t.(type) {
case *trie.StateTrie:
return t.Copy()
case *transitiontrie.TransitionTrie:
return t.Copy()
default:
panic(fmt.Errorf("unknown trie type %T", t))
}
}

View file

@ -0,0 +1,202 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package state
import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/stateless"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/trie/trienode"
)
// CodeMut represents a mutation to contract code.
type CodeMut struct {
Code []byte // Null for deletion
}
// AccountMut represents a mutation to an account.
// Semantics:
// - Account == nil: delete the account
// - Code == nil: leave code unchanged
// - Code != nil: apply the given code mutation
// - CodeSize: the account's CURRENT total code size, not just the bytes
// carried in Code. It is used by implementations that pack the code
// size into their on-trie account encoding (e.g. the binary trie
// BasicData leaf). Callers must always populate this field to the
// account's real code size, obtained via stateObject.CodeSize() or an
// equivalent source — even on balance/nonce-only updates where the
// code bytes themselves are not loaded. Leaving it at zero on a
// non-code-touching update silently corrupts on-trie state for any
// hasher that stores code size.
type AccountMut struct {
Account *Account // Null for deletion
Code *CodeMut // Null for unchanged
CodeSize int // Current code length (must be set by the caller)
}
// Hashes encapsulates a trie root together with its original (pre-update) root.
type Hashes struct {
Hash common.Hash // Post-mutation root
Prev common.Hash // Pre-mutation root
}
// StemWrite describes a single write to a bintrie stem offset. It is used
// by LeafProducer-capable hashers to report flat-state mutations derived
// from their trie updates so a downstream flat-state layer can be kept
// consistent with the hasher's on-trie view.
//
// Stem is the 31-byte common prefix of the EIP-7864 tree key. Offset is
// the index into the stem's 256-value group (0..255). Value is the
// 32-byte leaf value that was written; the caller uses the per-call
// policy documented on the binary hasher:
// - Account create/update: two writes (BasicData, CodeHash) with
// non-nil 32-byte values.
// - Storage update to a non-zero value: one write with the 32-byte
// normalized value.
// - Storage update to zero (the bintrie's "delete" convention): one
// write with 32 zero bytes (tombstone / present with zero).
// - Account delete: two writes with nil values, signalling the flat
// state to clear the corresponding offsets.
type StemWrite struct {
Stem [31]byte
Offset byte
Value []byte
}
// LeafProducer is an optional extension to Hasher for implementations
// that track flat-state mutations alongside trie updates. Callers use it
// to harvest the set of stem writes needed to keep an out-of-band flat
// state layer consistent with the hasher's trie mutations.
//
// The binary hasher implements this interface; the merkle hasher does
// not, because merkle flat state is MPT-shaped and does not use stems.
// Callers check via a type assertion:
//
// if lp, ok := h.(LeafProducer); ok {
// writes := lp.DrainStemWrites()
// // ... propagate writes into the state update ...
// }
//
// DrainStemWrites is intended to be called ONCE per block, AFTER all
// UpdateAccount/UpdateStorage calls for that block have completed. The
// implementation must reset its internal buffer on drain so subsequent
// calls return only writes accumulated since the last drain.
type LeafProducer interface {
// DrainStemWrites returns all stem writes accumulated since the last
// drain, in the order they were produced, and resets the internal
// buffer. The returned slice is owned by the caller; the hasher
// allocates a fresh slice on the next update.
DrainStemWrites() []StemWrite
}
// Hasher defines the minimal interface for computing state root hashes.
//
// It abstracts over different trie implementations, such as the traditional
// two-layer Merkle Patricia Trie (separate account and storage tries) and a
// unified single-layer binary trie (a single trie covering accounts, storages
// and contract code).
//
// This abstraction also enables alternative implementations, such as a no-op
// hasher for flat-state-only nodes (i.e. nodes that do not store trie data and
// do not perform state validation).
//
// The Hash method may be invoked multiple times and must return a hash that
// reflects all preceding state mutations. This behavior is required for
// compatibility with pre-Byzantium semantics.
type Hasher interface {
// UpdateAccount writes a list of accounts into the state.
UpdateAccount(addresses []common.Address, accounts []AccountMut) error
// UpdateStorage writes a list of storage slot values.
UpdateStorage(address common.Address, keys []common.Hash, values []common.Hash) error
// Hash computes and returns the state root hash without committing.
Hash() common.Hash
// Commit finalizes all pending changes and returns the resulting state root
// hash, along with the set of dirty trie nodes generated by the updates.
//
// Additionally, if the hasher uses a two-layer structure, the roots of the
// secondary tries together with their original hashes will also be returned
// for all mutated accounts, regardless of whether their storage was modified.
Commit() (common.Hash, *trienode.MergedNodeSet, map[common.Address]Hashes, error)
// Copy returns a deep-copied hasher instance.
Copy() Hasher
}
// Prefetcher is an optional extension implemented by hashers that can
// asynchronously warm up trie/state data ahead of hashing.
type Prefetcher interface {
// PrefetchAccount schedules the account for prefetching.
PrefetchAccount(addresses []common.Address, read bool)
// PrefetchStorage schedules the storage slot for prefetching.
PrefetchStorage(addr common.Address, keys []common.Hash, read bool)
// TermPrefetch terminates all the background prefetching activities.
TermPrefetch()
}
// WitnessCollector is an optional extension implemented by hashers that can
// construct a state witness for the most recent committed state transition.
type WitnessCollector interface {
// CollectWitness returns the state witness corresponding to the most recent
// committed state transition.
CollectWitness(*stateless.Witness)
}
// Prover is an optional extension implemented by hashers that can construct
// proofs against the current state.
type Prover interface {
// ProveAccount constructs a proof for the given account.
//
// The returned proof contains all encoded nodes on the path to the account.
// The account itself is included in the last node and can be retrieved by
// verifying the proof.
//
// If the account does not exist, the returned proof contains all nodes of
// the longest existing prefix of the account key (at least the root), ending
// with the node that proves the absence of the account.
ProveAccount(addr common.Address, proofDb ethdb.KeyValueWriter) error
// ProveStorage constructs a proof for the given storage slot of the
// specified account.
//
// The returned proof contains all encoded nodes on the path to the storage
// slot. The slot value itself is included in the last node and can be
// retrieved by verifying the proof.
//
// If the account or storage slot does not exist, the returned proof contains
// the nodes required to prove its absence.
ProveStorage(addr common.Address, key common.Hash, proofDb ethdb.KeyValueWriter) error
}
// noopHasher is a Hasher implementation that performs no work and always
// returns an empty state root.
type noopHasher struct{}
func (n *noopHasher) UpdateAccount([]common.Address, []AccountMut) error { return nil }
func (n *noopHasher) UpdateStorage(common.Address, []common.Hash, []common.Hash) error {
return nil
}
func (n *noopHasher) Hash() common.Hash { return common.Hash{} }
func (n *noopHasher) Commit() (common.Hash, *trienode.MergedNodeSet, map[common.Address]Hashes, error) {
return common.Hash{}, trienode.NewMergedNodeSet(), make(map[common.Address]Hashes), nil
}
func (n *noopHasher) Copy() Hasher { return &noopHasher{} }
func (n *noopHasher) Close() {}

View file

@ -0,0 +1,397 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package state
import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/stateless"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/trie/bintrie"
"github.com/ethereum/go-ethereum/trie/trienode"
"github.com/ethereum/go-ethereum/triedb"
)
// wrapBinTrie pairs a BinaryTrie with an optional background prefetcher that
// preloads trie nodes ahead of mutation.
type wrapBinTrie struct {
*bintrie.BinaryTrie
prefetcher *prefetcher
}
// newWrapBinTrie creates a binary trie with the optional prefetcher enabled.
func newWrapBinTrie(root common.Hash, db *triedb.Database, prefetch bool, prefetchRead bool) (*wrapBinTrie, error) {
t, err := bintrie.NewBinaryTrie(root, db)
if err != nil {
return nil, err
}
var p *prefetcher
if prefetch {
p = newPrefetcher(t, prefetchRead)
}
return &wrapBinTrie{BinaryTrie: t, prefetcher: p}, nil
}
// term synchronously terminates the prefetcher (no-op if nil or already done).
// After termination the prefetcher reference is nilled so subsequent calls are
// a cheap pointer check.
func (tr *wrapBinTrie) term() {
if tr.prefetcher == nil {
return
}
tr.prefetcher.terminate()
tr.prefetcher = nil
}
// The methods below shadow the embedded bintrie.BinaryTrie so that any direct trie
// access auto-terminates the prefetcher first. This makes data-race freedom
// structural: callers never need to remember to call term() manually.
func (tr *wrapBinTrie) UpdateAccount(address common.Address, acc *types.StateAccount, codeLen int) error {
tr.term()
return tr.BinaryTrie.UpdateAccount(address, acc, codeLen)
}
func (tr *wrapBinTrie) DeleteAccount(address common.Address) error {
tr.term()
return tr.BinaryTrie.DeleteAccount(address)
}
func (tr *wrapBinTrie) UpdateStorage(address common.Address, key, value []byte) error {
tr.term()
return tr.BinaryTrie.UpdateStorage(address, key, value)
}
func (tr *wrapBinTrie) DeleteStorage(address common.Address, key []byte) error {
tr.term()
return tr.BinaryTrie.DeleteStorage(address, key)
}
func (tr *wrapBinTrie) Hash() common.Hash {
tr.term()
return tr.BinaryTrie.Hash()
}
func (tr *wrapBinTrie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) {
tr.term()
return tr.BinaryTrie.Commit(collectLeaf)
}
func (tr *wrapBinTrie) Prove(key []byte, proofDb ethdb.KeyValueWriter) error {
tr.term()
return tr.BinaryTrie.Prove(key, proofDb)
}
func (tr *wrapBinTrie) Witness() map[string][]byte {
tr.term()
return tr.BinaryTrie.Witness()
}
func (tr *wrapBinTrie) prefetchAccounts(addresses []common.Address, read bool) {
if tr.prefetcher == nil {
return
}
tr.prefetcher.scheduleAccounts(addresses, read)
}
func (tr *wrapBinTrie) prefetchStorage(addr common.Address, keys []common.Hash, read bool) {
if tr.prefetcher == nil {
return
}
tr.prefetcher.scheduleSlots(addr, keys, read)
}
// copy returns a deep-copied state trie. Notably the prefetcher is deliberately
// not copied, as it only belongs to the original one.
func (tr *wrapBinTrie) copy() *wrapBinTrie {
tr.term()
return &wrapBinTrie{BinaryTrie: tr.BinaryTrie.Copy()}
}
// binaryHasher is a Hasher implementation backed by a unified single-layer
// binary trie. Accounts, storage slots, and contract code all reside in one
// trie, keyed according to the EIP-7864 address space layout.
//
// binaryHasher also implements LeafProducer: alongside every trie mutation
// it records the corresponding (stem, offset, value) write into an
// internal buffer. StateDB.commit() drains this buffer once per block
// via LeafProducer.DrainStemWrites and hands the writes to the pathdb
// flat-state layer via stateUpdate.encodeBinary, keeping the bintrie
// trie and its flat-state mirror consistent without recomputing the
// bintrie key derivation twice.
type binaryHasher struct {
db *triedb.Database
root common.Hash
prefetch bool
trie *wrapBinTrie
// leaves buffers flat-state writes produced as a side-effect of
// UpdateAccount/UpdateStorage/deleteAccount. It is cleared by
// DrainStemWrites. Direct reads and writes to this slice are only
// safe from the single goroutine that owns the hasher; the Hasher
// interface already requires single-threaded use per block.
leaves []StemWrite
}
// Compile-time assertion that binaryHasher implements LeafProducer.
var _ LeafProducer = (*binaryHasher)(nil)
func newBinaryHasher(root common.Hash, db *triedb.Database, prefetch bool, prefetchRead bool) (*binaryHasher, error) {
tr, err := newWrapBinTrie(root, db, prefetch, prefetchRead)
if err != nil {
return nil, err
}
return &binaryHasher{
db: db,
root: root,
prefetch: prefetch,
trie: tr,
}, nil
}
// DrainStemWrites implements LeafProducer. It returns the buffered stem
// writes accumulated since the last drain and resets the buffer. The
// returned slice is owned by the caller; the hasher allocates a fresh
// backing array on the next update.
func (h *binaryHasher) DrainStemWrites() []StemWrite {
out := h.leaves
h.leaves = nil
return out
}
// recordLeaf appends a single stem write to the internal buffer. The
// stem is taken from the first 31 bytes of the supplied 32-byte tree
// key, and the offset is the last byte. Value may be nil (for clearing
// a slot in the flat state, matching account deletion) or a 32-byte
// slice (for writes).
func (h *binaryHasher) recordLeaf(fullKey []byte, value []byte) {
var w StemWrite
copy(w.Stem[:], fullKey[:bintrie.StemSize])
w.Offset = fullKey[bintrie.StemSize]
if value != nil {
w.Value = make([]byte, len(value))
copy(w.Value, value)
}
h.leaves = append(h.leaves, w)
}
// deleteAccount removes the account specified by the address from the state.
//
// In addition to the trie mutation, this records two "clear" stem writes
// (one for BasicData at offset 0 and one for CodeHash at offset 1) so
// the flat-state mirror can drop the matching entries.
//
// Note: BinaryTrie.DeleteAccount is currently a no-op upstream
// (tracked as a standalone bugfix PR against ethereum/go-ethereum).
// Until that fix lands the on-trie deletion does nothing, but the
// flat-state mirror will still drop its copy — a minor temporary
// inconsistency scoped to the account-delete path. Once the trie fix
// lands the two sides converge.
//
// Storage slots and code chunks at the same or other stems are NOT
// touched by this function; callers that need a full account wipe must
// walk storage explicitly. Pre-EIP-6780 self-destruct wipe is a
// documented scope limitation.
func (h *binaryHasher) deleteAccount(addr common.Address) error {
// Record the flat-state mutations BEFORE the trie call so the
// buffer still reflects the intended write even if the trie layer
// errors and we need to roll things back.
basicDataKey := bintrie.GetBinaryTreeKeyBasicData(addr)
codeHashKey := bintrie.GetBinaryTreeKeyCodeHash(addr)
h.recordLeaf(basicDataKey, nil) // nil → clear the flat-state offset
h.recordLeaf(codeHashKey, nil)
return h.trie.DeleteAccount(addr)
}
// update writes the account specified by the address into the state.
//
// The account's code size is taken from AccountMut.CodeSize, which the
// caller (StateDB.IntermediateRoot) populates via stateObject.CodeSize().
// Per EIP-7864 the code_size field is packed into the BasicData leaf
// (bytes 5-7) and is consensus-critical; BinaryTrie.UpdateAccount rewrites
// the entire BasicData blob on every call, so passing the wrong codeLen
// would silently overwrite the stored code_size. In particular, for
// balance/nonce-only updates the new code bytes (account.Code) are nil
// and len(obj.code) is 0, yet the account may still have a non-zero code
// size that must be preserved — the caller gets this right by consulting
// the stateObject, which falls back to a reader code-size lookup when
// the bytes are not loaded.
func (h *binaryHasher) updateAccount(addr common.Address, account AccountMut) error {
data := &types.StateAccount{
Nonce: account.Account.Nonce,
Balance: account.Account.Balance,
CodeHash: account.Account.CodeHash,
}
if err := h.trie.UpdateAccount(addr, data, account.CodeSize); err != nil {
return err
}
// Record the two flat-state writes that correspond to the on-trie
// BasicData (offset 0) and CodeHash (offset 1) at the account's
// stem. PackBasicData produces the same 32-byte blob that the trie
// layer packs internally, so the flat-state mirror encodes
// bit-identically.
basicData := bintrie.PackBasicData(data.Nonce, data.Balance, account.CodeSize)
h.recordLeaf(bintrie.GetBinaryTreeKeyBasicData(addr), basicData[:])
// CodeHash is a 32-byte value written straight into offset 1.
// EOAs store types.EmptyCodeHash here (a known non-zero hash) so
// the flat-state offset is always set after any non-delete update.
h.recordLeaf(bintrie.GetBinaryTreeKeyCodeHash(addr), data.CodeHash)
// Write chunked code into the trie when dirty.
if account.Code != nil && len(account.Code.Code) > 0 {
codeHash := common.BytesToHash(account.Account.CodeHash)
if err := h.trie.UpdateContractCode(addr, codeHash, account.Code.Code); err != nil {
return err
}
}
return nil
}
// UpdateAccount implements Hasher, writing a list of account mutations
// into the state. The assumption is held all the storage changes have
// already been written beforehand.
func (h *binaryHasher) UpdateAccount(addresses []common.Address, accounts []AccountMut) error {
var err error
for i, addr := range addresses {
if accounts[i].Account == nil {
err = h.deleteAccount(addr)
} else {
err = h.updateAccount(addr, accounts[i])
}
if err != nil {
return err
}
}
return nil
}
// UpdateStorage implements Hasher, writing a list of storage slot mutations
// into the state. This function must be invoked first before writing the
// associated account metadata into the state.
//
// Each mutation is also recorded as a flat-state stem write. A zero value
// is the bintrie's "delete" convention: the trie writes 32 zero bytes at
// the slot, and the flat-state mirror does the same (a present-with-zero
// tombstone) rather than removing the offset from its bitmap. This keeps
// the trie and flat-state views bit-identical for the slot.
func (h *binaryHasher) UpdateStorage(address common.Address, keys []common.Hash, values []common.Hash) error {
var err error
for i, key := range keys {
// BinaryTrie.UpdateStorage right-justifies a shorter input into
// 32 bytes; for a non-zero common.Hash the input is already 32
// bytes so the normalization is a no-op. For the zero-value
// case we emit 32 zero bytes explicitly to match the trie's
// tombstone convention.
var blob [bintrie.HashSize]byte
if values[i] == (common.Hash{}) {
err = h.trie.DeleteStorage(address, key[:])
} else {
copy(blob[:], values[i][:])
err = h.trie.UpdateStorage(address, key[:], blob[:])
}
if err != nil {
return err
}
// Record the flat-state mirror write regardless of zero/non-zero:
// the blob is 32 zero bytes in the delete case and the value in
// the non-delete case.
storageKey := bintrie.GetBinaryTreeKeyStorageSlot(address, key[:])
h.recordLeaf(storageKey, blob[:])
}
return nil
}
// Hash implements Hasher, computing the state root hash without committing.
func (h *binaryHasher) Hash() common.Hash {
return h.trie.Hash()
}
// Commit implements Hasher, finalizing all pending changes and returning
// the resulting state root hash, along with the set of dirty trie nodes
// generated by the updates.
func (h *binaryHasher) Commit() (common.Hash, *trienode.MergedNodeSet, map[common.Address]Hashes, error) {
nodes := trienode.NewMergedNodeSet()
root, set := h.trie.Commit(false)
if set != nil {
if err := nodes.Merge(set); err != nil {
return common.Hash{}, nil, nil, err
}
}
// The binary trie is a single unified structure with no per-account
// storage sub-tries, so there are no secondary hashes to report.
return root, nodes, nil, nil
}
// Copy implements Hasher, returning a deep-copied hasher instance.
func (h *binaryHasher) Copy() Hasher {
return &binaryHasher{
db: h.db,
root: h.root,
prefetch: false,
trie: h.trie.copy(),
}
}
// ProveAccount implements Prover. NOTE: BinaryTrie.Prove is not yet
// implemented (panics at runtime). The key derivation also needs to use
// bintrie tree keys instead of keccak256. Do not call until the bintrie
// proof path is implemented.
func (h *binaryHasher) ProveAccount(addr common.Address, proofDb ethdb.KeyValueWriter) error {
return h.trie.Prove(crypto.Keccak256(addr.Bytes()), proofDb)
}
// ProveStorage implements Prover. NOTE: same limitation as ProveAccount —
// BinaryTrie.Prove panics and the key derivation is wrong.
func (h *binaryHasher) ProveStorage(addr common.Address, key common.Hash, proofDb ethdb.KeyValueWriter) error {
return h.trie.Prove(crypto.Keccak256(key.Bytes()), proofDb)
}
// CollectWitness implements WitnessCollector. It aggregates all trie nodes
// accessed during the state transition from the unified binary trie into
// a single state witness.
func (h *binaryHasher) CollectWitness(witness *stateless.Witness) {
witness.AddState(h.trie.Witness(), common.Hash{})
}
// PrefetchAccount implements Prefetcher, preloading the nodes of specific accounts.
func (h *binaryHasher) PrefetchAccount(addresses []common.Address, read bool) {
if !h.prefetch {
return
}
h.trie.prefetchAccounts(addresses, read)
}
// PrefetchStorage implements Prefetcher, scheduling storage slot nodes for
// background loading in the unified binary trie.
func (h *binaryHasher) PrefetchStorage(addr common.Address, keys []common.Hash, read bool) {
if !h.prefetch {
return
}
h.trie.prefetchStorage(addr, keys, read)
}
// TermPrefetch terminates all prefetcher goroutines. Safe to call multiple times.
func (h *binaryHasher) TermPrefetch() {
if h == nil {
return
}
h.trie.term()
}

View file

@ -0,0 +1,548 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package state
import (
"bytes"
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/stateless"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/trie/bintrie"
"github.com/ethereum/go-ethereum/triedb"
)
// newTestBinaryHasher creates a binaryHasher backed by an in-memory path database.
func newTestBinaryHasher(t *testing.T, db *triedb.Database, root common.Hash, cfg hasherTestConfig) *binaryHasher {
t.Helper()
h, err := newBinaryHasher(root, db, cfg.prefetch, cfg.prefetchRead)
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() { h.TermPrefetch() })
return h
}
// commitAndReopenBinary commits the hasher's state and reopens a fresh hasher
// from the committed root. This simulates a block boundary.
func commitAndReopenBinary(t *testing.T, h *binaryHasher, cfg hasherTestConfig) *binaryHasher {
t.Helper()
root, nodes, _, err := h.Commit()
if err != nil {
t.Fatal(err)
}
if nodes != nil {
if err := h.db.Update(root, h.root, 0, nodes, triedb.NewStateSet()); err != nil {
t.Fatal(err)
}
if err := h.db.Commit(root, false); err != nil {
t.Fatal(err)
}
}
h2, err := newBinaryHasher(root, h.db, cfg.prefetch, cfg.prefetchRead)
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() { h2.TermPrefetch() })
return h2
}
// makeBinaryBaseState creates a non-empty state as the starting point for tests.
// The base contains:
// - addr1: nonce=1, balance=100, storage={slot1: val1, slot2: val2}
// - addr2: nonce=2, balance=200, no storage
//
// The state is committed and flushed so the hasher returned opens from disk.
func makeBinaryBaseState(t *testing.T, cfg hasherTestConfig) *binaryHasher {
t.Helper()
noPrefetch := hasherTestConfig{"base", false, false}
db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), triedb.VerkleDefaults)
h := newTestBinaryHasher(t, db, types.EmptyBinaryHash, noPrefetch)
if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot1, hasherSlot2}, []common.Hash{hasherVal1, hasherVal2}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount(
[]common.Address{hasherAddr1, hasherAddr2},
[]AccountMut{hasherAccount(1, 100), hasherAccount(2, 200)},
); err != nil {
t.Fatal(err)
}
return commitAndReopenBinary(t, h, cfg)
}
// TestBinaryHasherBasic verifies that mutating storage and accounts on top of
// a non-empty base state produces a deterministic, non-empty root and that the
// root survives a commit+reopen cycle.
func TestBinaryHasherBasic(t *testing.T) {
for _, cfg := range hasherTestConfigs {
t.Run(cfg.name, func(t *testing.T) {
h := makeBinaryBaseState(t, cfg)
if cfg.prefetch {
h.PrefetchStorage(hasherAddr1, []common.Hash{hasherSlot3}, false)
h.PrefetchAccount([]common.Address{hasherAddr1, hasherAddr3}, false)
}
if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot3}, []common.Hash{hasherVal3}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount(
[]common.Address{hasherAddr1, hasherAddr3},
[]AccountMut{hasherAccount(1, 100), hasherAccount(3, 300)},
); err != nil {
t.Fatal(err)
}
root := h.Hash()
if root == types.EmptyRootHash {
t.Fatal("expected non-empty root after mutations")
}
h2 := commitAndReopenBinary(t, h, cfg)
if h2.Hash() != root {
t.Fatalf("root mismatch after reopen: got %x, want %x", h2.Hash(), root)
}
})
}
}
// TestBinaryHasherPrefetchReadOnly verifies that read-only prefetching (for
// accounts and storage that are never subsequently mutated) does not corrupt
// state. Both prefetchRead=true (requests are processed) and prefetchRead=false
// (requests are dropped) are tested.
func TestBinaryHasherPrefetchReadOnly(t *testing.T) {
for _, prefetchRead := range []bool{false, true} {
name := "readDropped"
if prefetchRead {
name = "readProcessed"
}
t.Run(name, func(t *testing.T) {
cfg := hasherTestConfig{name, true, prefetchRead}
h := makeBinaryBaseState(t, cfg)
rootBefore := h.Hash()
// Prefetch addr1's account and storage (read-only).
h.PrefetchAccount([]common.Address{hasherAddr1, hasherAddr2}, true)
h.PrefetchStorage(hasherAddr1, []common.Hash{hasherSlot1, hasherSlot2}, true)
// Only mutate addr2 — addr1's prefetched data is never written.
if err := h.UpdateAccount(
[]common.Address{hasherAddr2},
[]AccountMut{hasherAccount(2, 300)},
); err != nil {
t.Fatal(err)
}
root := h.Hash()
if root == rootBefore {
t.Fatal("expected root to change after balance update")
}
h2 := commitAndReopenBinary(t, h, hasherTestConfig{"verify", false, false})
if h2.Hash() != root {
t.Fatalf("root mismatch: got %x, want %x", h2.Hash(), root)
}
})
}
}
// TestBinaryHasherPrefetchDeterminism verifies that the resulting root is
// identical across all prefetch configurations for the same set of mutations.
func TestBinaryHasherPrefetchDeterminism(t *testing.T) {
var roots []common.Hash
for _, cfg := range hasherTestConfigs {
h := makeBinaryBaseState(t, cfg)
if cfg.prefetch {
h.PrefetchAccount([]common.Address{hasherAddr1, hasherAddr3}, false)
h.PrefetchStorage(hasherAddr1, []common.Hash{hasherSlot3}, false)
h.PrefetchStorage(hasherAddr3, []common.Hash{hasherSlot1}, false)
}
if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot3}, []common.Hash{hasherVal3}); err != nil {
t.Fatal(err)
}
if err := h.UpdateStorage(hasherAddr3, []common.Hash{hasherSlot1}, []common.Hash{hasherVal1}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount(
[]common.Address{hasherAddr1, hasherAddr3},
[]AccountMut{hasherAccount(1, 100), hasherAccount(3, 300)},
); err != nil {
t.Fatal(err)
}
roots = append(roots, h.Hash())
}
for i := 1; i < len(roots); i++ {
if roots[i] != roots[0] {
t.Fatalf("root diverged: config[0]=%x config[%d]=%x", roots[0], i, roots[i])
}
}
}
// TestBinaryHasherCopy verifies that Copy produces an independent snapshot:
// mutations on the copy must not affect the original's hash.
func TestBinaryHasherCopy(t *testing.T) {
cfg := hasherTestConfig{"prefetchAll", true, true}
h := makeBinaryBaseState(t, cfg)
h.PrefetchAccount([]common.Address{hasherAddr1}, false)
h.PrefetchStorage(hasherAddr1, []common.Hash{hasherSlot3}, false)
if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot3}, []common.Hash{hasherVal3}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount([]common.Address{hasherAddr1}, []AccountMut{hasherAccount(1, 100)}); err != nil {
t.Fatal(err)
}
origRoot := h.Hash()
cpy := h.Copy()
defer cpy.(*binaryHasher).TermPrefetch()
// Mutate the copy: delete slot3, add slot2 with new value.
if err := cpy.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot3, hasherSlot2}, []common.Hash{{}, hasherVal3}); err != nil {
t.Fatal(err)
}
if err := cpy.UpdateAccount([]common.Address{hasherAddr1}, []AccountMut{hasherAccount(1, 100)}); err != nil {
t.Fatal(err)
}
if cpy.Hash() == origRoot {
t.Fatal("copy should diverge after mutation")
}
if h.Hash() != origRoot {
t.Fatal("original root changed after mutating copy")
}
}
// TestBinaryHasherWitness verifies that the witness returned by CollectWitness
// contains trie nodes for accessed accounts and storage. When read-only
// prefetching is enabled, the prefetched (but never written) data must also
// appear in the witness.
func TestBinaryHasherWitness(t *testing.T) {
// Collect witness WITHOUT read-prefetching: only mutated paths are tracked.
collectWitness := func(prefetchRead bool) int {
cfg := hasherTestConfig{"witness", true, prefetchRead}
h := makeBinaryBaseState(t, cfg)
// Read-only prefetch of addr1 account and slot1 (never mutated below).
h.PrefetchAccount([]common.Address{hasherAddr1}, true)
h.PrefetchStorage(hasherAddr1, []common.Hash{hasherSlot1}, true)
// Mutate only addr2 (no storage).
if err := h.UpdateAccount(
[]common.Address{hasherAddr2},
[]AccountMut{hasherAccount(2, 300)},
); err != nil {
t.Fatal(err)
}
h.Hash()
witness := &stateless.Witness{
Codes: make(map[string]struct{}),
State: make(map[string]struct{}),
}
h.CollectWitness(witness)
return len(witness.State)
}
nodesWithoutRead := collectWitness(false)
nodesWithRead := collectWitness(true)
if nodesWithoutRead == 0 {
t.Fatal("witness should contain trie nodes even without read prefetching")
}
if nodesWithRead <= nodesWithoutRead {
t.Fatalf("read-only prefetching should add extra nodes to witness: got %d (with read) vs %d (without)", nodesWithRead, nodesWithoutRead)
}
}
// TestBinaryHasherLeafProduction verifies that binaryHasher implements
// LeafProducer and reports stem writes corresponding to each trie
// mutation. Covers the three mutation kinds the hasher performs:
// account update, storage update, and account delete.
func TestBinaryHasherLeafProduction(t *testing.T) {
db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), triedb.VerkleDefaults)
h := newTestBinaryHasher(t, db, types.EmptyBinaryHash, hasherTestConfig{"leaf", false, false})
// Type assertion: binaryHasher must satisfy LeafProducer.
lp, ok := Hasher(h).(LeafProducer)
if !ok {
t.Fatal("binaryHasher should implement LeafProducer")
}
// --- Account update: expect two writes (BasicData + CodeHash) ---
if err := h.UpdateAccount(
[]common.Address{hasherAddr1},
[]AccountMut{hasherAccount(1, 100)},
); err != nil {
t.Fatalf("UpdateAccount: %v", err)
}
writes := lp.DrainStemWrites()
if len(writes) != 2 {
t.Fatalf("UpdateAccount: got %d stem writes, want 2 (BasicData + CodeHash)", len(writes))
}
// Offsets 0 and 1 respectively, and the BasicData stem matches the
// CodeHash stem (same address → same 31-byte stem).
if writes[0].Offset != bintrie.BasicDataLeafKey {
t.Errorf("write[0].Offset = %d, want %d (BasicDataLeafKey)", writes[0].Offset, bintrie.BasicDataLeafKey)
}
if writes[1].Offset != bintrie.CodeHashLeafKey {
t.Errorf("write[1].Offset = %d, want %d (CodeHashLeafKey)", writes[1].Offset, bintrie.CodeHashLeafKey)
}
if writes[0].Stem != writes[1].Stem {
t.Errorf("stems differ: %x vs %x", writes[0].Stem, writes[1].Stem)
}
if len(writes[0].Value) != 32 {
t.Errorf("write[0].Value length = %d, want 32", len(writes[0].Value))
}
if len(writes[1].Value) != 32 {
t.Errorf("write[1].Value length = %d, want 32", len(writes[1].Value))
}
// The code hash leaf should be the empty-code hash (non-zero).
if !bytes.Equal(writes[1].Value, types.EmptyCodeHash.Bytes()) {
t.Errorf("write[1].Value = %x, want empty code hash %x", writes[1].Value, types.EmptyCodeHash.Bytes())
}
// --- Drain again: should be empty (drain is destructive) ---
if again := lp.DrainStemWrites(); len(again) != 0 {
t.Fatalf("second drain should be empty, got %d writes", len(again))
}
// --- Storage update: non-zero value produces one write ---
if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot1}, []common.Hash{hasherVal1}); err != nil {
t.Fatalf("UpdateStorage: %v", err)
}
writes = lp.DrainStemWrites()
if len(writes) != 1 {
t.Fatalf("UpdateStorage: got %d writes, want 1", len(writes))
}
// The recorded value should match hasherVal1 (a common.Hash), which
// is already 32 bytes wide.
if !bytes.Equal(writes[0].Value, hasherVal1[:]) {
t.Errorf("UpdateStorage value: got %x, want %x", writes[0].Value, hasherVal1)
}
// --- Storage "delete" (zero value): one write with 32 zero bytes ---
if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot1}, []common.Hash{{}}); err != nil {
t.Fatalf("UpdateStorage (zero): %v", err)
}
writes = lp.DrainStemWrites()
if len(writes) != 1 {
t.Fatalf("UpdateStorage (zero): got %d writes, want 1", len(writes))
}
var zeros [32]byte
if !bytes.Equal(writes[0].Value, zeros[:]) {
t.Errorf("zero-value storage write should record 32 zero bytes, got %x", writes[0].Value)
}
// --- Account delete: two writes with nil values ---
if err := h.UpdateAccount(
[]common.Address{hasherAddr1},
[]AccountMut{{Account: nil}},
); err != nil {
t.Fatalf("UpdateAccount delete: %v", err)
}
writes = lp.DrainStemWrites()
if len(writes) != 2 {
t.Fatalf("delete: got %d writes, want 2 (BasicData + CodeHash clear)", len(writes))
}
for i, w := range writes {
if w.Value != nil {
t.Errorf("delete write[%d] should have nil Value (clear), got %x", i, w.Value)
}
}
if writes[0].Offset != bintrie.BasicDataLeafKey || writes[1].Offset != bintrie.CodeHashLeafKey {
t.Errorf("delete offsets: got %d,%d, want %d,%d", writes[0].Offset, writes[1].Offset, bintrie.BasicDataLeafKey, bintrie.CodeHashLeafKey)
}
}
// TestMerkleHasherNoLeafProducer verifies that merkleHasher does NOT
// implement LeafProducer — the interface is strictly opt-in and the MPT
// path has no concept of stem writes.
func TestMerkleHasherNoLeafProducer(t *testing.T) {
db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), nil)
h, err := newMerkleHasher(types.EmptyRootHash, db, false, false)
if err != nil {
t.Fatal(err)
}
if _, ok := Hasher(h).(LeafProducer); ok {
t.Fatal("merkleHasher should NOT implement LeafProducer")
}
}
// TestBinaryHasherWritesBothBasicAndCodeHash is a load-bearing invariant
// test for the A1 remediation. The bintrieFlatReader.Account method
// performs TWO independent AccountRLP reads (BasicData at offset 0 and
// CodeHash at offset 1). Cross-read consistency is only safe if the
// hasher ALWAYS co-writes both leaves whenever it touches an account —
// if a future optimization (e.g., a code-only update) emitted only the
// CodeHash leaf, the two reads could resolve to different layers and
// return a torn view.
//
// This test locks the invariant down: after an UpdateAccount call, the
// drained stem writes must contain EXACTLY ONE BasicData write and
// EXACTLY ONE CodeHash write for the touched address, both at the same
// stem. Any change to binaryHasher.updateAccount that drops either
// write will fail this test and the developer will be forced to
// re-evaluate the bintrieFlatReader.Account torn-read argument before
// shipping.
func TestBinaryHasherWritesBothBasicAndCodeHash(t *testing.T) {
db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), triedb.VerkleDefaults)
h := newTestBinaryHasher(t, db, types.EmptyBinaryHash, hasherTestConfig{"inv", false, false})
lp, ok := Hasher(h).(LeafProducer)
if !ok {
t.Fatal("binaryHasher should implement LeafProducer")
}
// Update a single account. The hasher MUST emit exactly two stem
// writes: BasicData (offset 0) and CodeHash (offset 1), at the
// same stem.
if err := h.UpdateAccount(
[]common.Address{hasherAddr1},
[]AccountMut{hasherAccount(1, 100)},
); err != nil {
t.Fatalf("UpdateAccount: %v", err)
}
writes := lp.DrainStemWrites()
if len(writes) != 2 {
t.Fatalf("expected exactly 2 stem writes per UpdateAccount (BasicData + CodeHash), got %d", len(writes))
}
// Verify one is BasicData and one is CodeHash.
seenBasic := false
seenCode := false
for _, w := range writes {
switch w.Offset {
case bintrie.BasicDataLeafKey:
seenBasic = true
case bintrie.CodeHashLeafKey:
seenCode = true
default:
t.Errorf("unexpected stem write offset %d (want %d or %d)", w.Offset, bintrie.BasicDataLeafKey, bintrie.CodeHashLeafKey)
}
}
if !seenBasic {
t.Error("UpdateAccount did NOT emit a BasicData leaf write — bintrieFlatReader.Account torn-read invariant broken")
}
if !seenCode {
t.Error("UpdateAccount did NOT emit a CodeHash leaf write — bintrieFlatReader.Account torn-read invariant broken")
}
// Verify both writes target the same stem.
if writes[0].Stem != writes[1].Stem {
t.Errorf("BasicData and CodeHash writes at different stems: %x vs %x", writes[0].Stem, writes[1].Stem)
}
// Exercise the delete path too: binaryHasher.deleteAccount should
// also emit both nil writes.
if err := h.UpdateAccount(
[]common.Address{hasherAddr1},
[]AccountMut{{Account: nil}},
); err != nil {
t.Fatalf("UpdateAccount (delete): %v", err)
}
deleteWrites := lp.DrainStemWrites()
if len(deleteWrites) != 2 {
t.Fatalf("expected 2 stem writes per account delete, got %d", len(deleteWrites))
}
for i, w := range deleteWrites {
if w.Value != nil {
t.Errorf("delete write[%d] should have nil Value, got %x", i, w.Value)
}
}
}
// TestStateUpdateEncodeBinaryFromLeaves verifies that stateUpdate.encodeBinary
// turns a slice of StemWrite values into the per-offset accountData map that
// pathdb's bintrie codec consumes. Three things matter:
//
// 1. Every leaf becomes one accountData entry, keyed by stem||offset.
// 2. nil-value leaves (account/storage deletes) become nil entries.
// 3. Non-nil leaves are deeply copied — encodeBinary must not retain
// pointers into the hasher's internal slab.
//
// storages/storageOrigin/accountOrigin remain empty: the bintrie path uses
// only accountData (per the layered-read design) and does not yet support
// state-history rollback.
func TestStateUpdateEncodeBinaryFromLeaves(t *testing.T) {
// Build a small leaves slice covering each kind of write the binary
// hasher emits: account update (BasicData + CodeHash), storage write,
// and a delete (nil value).
var (
stemA [bintrie.StemSize]byte
stemB [bintrie.StemSize]byte
)
for i := range stemA {
stemA[i] = byte(0x10 + i)
stemB[i] = byte(0xA0 + i)
}
basicDataValue := bytes.Repeat([]byte{0xAA}, 32)
codeHashValue := bytes.Repeat([]byte{0xBB}, 32)
storageValue := bytes.Repeat([]byte{0xCC}, 32)
leaves := []StemWrite{
// Account update at stemA: BasicData + CodeHash.
{Stem: stemA, Offset: bintrie.BasicDataLeafKey, Value: basicDataValue},
{Stem: stemA, Offset: bintrie.CodeHashLeafKey, Value: codeHashValue},
// Storage write at stemB.
{Stem: stemB, Offset: 7, Value: storageValue},
// Account delete at a third stem (nil values clear offsets 0+1).
{Stem: [bintrie.StemSize]byte{0xFF, 0xFF}, Offset: bintrie.BasicDataLeafKey, Value: nil},
{Stem: [bintrie.StemSize]byte{0xFF, 0xFF}, Offset: bintrie.CodeHashLeafKey, Value: nil},
}
su := &stateUpdate{leaves: leaves}
accounts, accountOrigin, storages, storageOrigin, err := su.encodeBinary()
if err != nil {
t.Fatalf("encodeBinary: %v", err)
}
if len(accounts) != len(leaves) {
t.Fatalf("accounts len = %d, want %d", len(accounts), len(leaves))
}
if len(storages) != 0 {
t.Errorf("storages should be empty for bintrie, got %d entries", len(storages))
}
if len(accountOrigin) != 0 || len(storageOrigin) != 0 {
t.Errorf("origin maps should be empty for bintrie")
}
// Check each leaf round-trips through the map under its full key.
for i, w := range leaves {
var fullKey common.Hash
copy(fullKey[:bintrie.StemSize], w.Stem[:])
fullKey[bintrie.StemSize] = w.Offset
got, ok := accounts[fullKey]
if !ok {
t.Errorf("leaf %d: missing key %x", i, fullKey)
continue
}
if w.Value == nil {
if got != nil {
t.Errorf("leaf %d: nil leaf became %x", i, got)
}
continue
}
if !bytes.Equal(got, w.Value) {
t.Errorf("leaf %d: got %x, want %x", i, got, w.Value)
}
// Aliasing check: the encoder must own its bytes.
if len(got) > 0 && &got[0] == &w.Value[0] {
t.Errorf("leaf %d: encodeBinary aliased the input slice", i)
}
}
}

View file

@ -0,0 +1,470 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package state
import (
"maps"
"sync"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/stateless"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/trie"
"github.com/ethereum/go-ethereum/trie/trienode"
"github.com/ethereum/go-ethereum/triedb"
"golang.org/x/sync/errgroup"
)
// wrapTrie pairs a StateTrie with an optional background prefetcher that
// preloads trie nodes ahead of mutation.
type wrapTrie struct {
*trie.StateTrie
prefetcher *prefetcher
}
// newWrapTrie creates a merkle trie with the optional prefetcher enabled.
func newWrapTrie(id *trie.ID, db *triedb.Database, prefetch bool, prefetchRead bool) (*wrapTrie, error) {
t, err := trie.NewStateTrie(id, db)
if err != nil {
return nil, err
}
var p *prefetcher
if prefetch {
p = newPrefetcher(t, prefetchRead)
}
return &wrapTrie{StateTrie: t, prefetcher: p}, nil
}
// term synchronously terminates the prefetcher (no-op if nil or already done).
// After termination the prefetcher reference is nilled so subsequent calls are
// a cheap pointer check.
func (tr *wrapTrie) term() {
if tr.prefetcher == nil {
return
}
tr.prefetcher.terminate()
tr.prefetcher = nil
}
// The methods below shadow the embedded trie.StateTrie so that any direct trie
// access auto-terminates the prefetcher first. This makes data-race freedom
// structural: callers never need to remember to call term() manually.
func (tr *wrapTrie) UpdateAccount(address common.Address, acc *types.StateAccount) error {
tr.term()
return tr.StateTrie.UpdateAccount(address, acc, 0)
}
func (tr *wrapTrie) DeleteAccount(address common.Address) error {
tr.term()
return tr.StateTrie.DeleteAccount(address)
}
func (tr *wrapTrie) UpdateStorage(address common.Address, key, value []byte) error {
tr.term()
return tr.StateTrie.UpdateStorage(address, key, value)
}
func (tr *wrapTrie) DeleteStorage(address common.Address, key []byte) error {
tr.term()
return tr.StateTrie.DeleteStorage(address, key)
}
func (tr *wrapTrie) Hash() common.Hash {
tr.term()
return tr.StateTrie.Hash()
}
func (tr *wrapTrie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) {
tr.term()
return tr.StateTrie.Commit(collectLeaf)
}
func (tr *wrapTrie) Prove(key []byte, proofDb ethdb.KeyValueWriter) error {
tr.term()
return tr.StateTrie.Prove(key, proofDb)
}
func (tr *wrapTrie) Witness() map[string][]byte {
tr.term()
return tr.StateTrie.Witness()
}
// prefetchAccounts prewarms the trie with the specified account list.
func (tr *wrapTrie) prefetchAccounts(addresses []common.Address, read bool) {
if tr.prefetcher == nil {
return
}
tr.prefetcher.scheduleAccounts(addresses, read)
}
// prefetchStorage prewarms the trie with the specified storage list.
func (tr *wrapTrie) prefetchStorage(addr common.Address, keys []common.Hash, read bool) {
if tr.prefetcher == nil {
return
}
tr.prefetcher.scheduleSlots(addr, keys, read)
}
// copy returns a deep-copied state trie. Notably the prefetcher is deliberately
// not copied, as it only belongs to the original one.
func (tr *wrapTrie) copy() *wrapTrie {
tr.term()
return &wrapTrie{StateTrie: tr.StateTrie.Copy()}
}
// storageRootReader wraps the account trie for loading the storage root. It is
// essential to use an independent trie to prevent potential data races with
// the optional prefetcher.
//
// TODO(rjl493456442) use the flat state for better read efficiency.
type storageRootReader struct {
tr *trie.StateTrie
}
func newStorageRootReader(root common.Hash, db *triedb.Database) (*storageRootReader, error) {
t, err := trie.NewStateTrie(trie.StateTrieID(root), db)
if err != nil {
return nil, err
}
return &storageRootReader{tr: t}, nil
}
func (r *storageRootReader) read(address common.Address) (common.Hash, error) {
acct, err := r.tr.GetAccount(address)
if err != nil {
return common.Hash{}, err
}
if acct == nil {
return types.EmptyRootHash, nil
}
return acct.Root, nil
}
func (r *storageRootReader) copy() *storageRootReader {
return &storageRootReader{tr: r.tr.Copy()}
}
// merkleHasher is a Hasher implementation backed by the traditional two-layer
// Merkle Patricia Trie (separate account trie and per-account storage tries).
type merkleHasher struct {
db *triedb.Database
root common.Hash
reader *storageRootReader
prefetch bool
prefetchRead bool
acctTrie *wrapTrie
storageTries map[common.Address]*wrapTrie
// deletedTries preserves storage tries of accounts that were deleted
// during the block keyed by address. Only the first deletion per
// address is recorded (the pre-block incarnation).
deletedTries map[common.Address]*wrapTrie
// storageRoots tracks the storage root transition for each resolved
// account. Prev is captured on first touch; Hash is updated by
// UpdateStorage or set to EmptyRootHash on deletion.
storageRoots map[common.Address]Hashes
// Lock guards storage trie fields
storageLock sync.Mutex
}
func newMerkleHasher(root common.Hash, db *triedb.Database, prefetch bool, prefetchRead bool) (*merkleHasher, error) {
tr, err := newWrapTrie(trie.StateTrieID(root), db, prefetch, prefetchRead)
if err != nil {
return nil, err
}
r, err := newStorageRootReader(root, db)
if err != nil {
return nil, err
}
return &merkleHasher{
db: db,
root: root,
prefetch: prefetch,
prefetchRead: prefetchRead,
reader: r,
acctTrie: tr,
storageTries: make(map[common.Address]*wrapTrie),
deletedTries: make(map[common.Address]*wrapTrie),
storageRoots: make(map[common.Address]Hashes),
}, nil
}
// storageRoot returns the current tracked storage root for addr. On first
// access for a given address the root is read from the account trie and
// recorded as the Prev value for the commit-time transition report.
func (h *merkleHasher) storageRoot(addr common.Address) (common.Hash, error) {
if hashes, ok := h.storageRoots[addr]; ok {
return hashes.Hash, nil
}
root, err := h.reader.read(addr)
if err != nil {
return common.Hash{}, err
}
h.storageRoots[addr] = Hashes{
Prev: root,
Hash: root,
}
return root, nil
}
// openStorageTrie returns the cached storage trie for addr, or opens one from
// the database if not already cached.
func (h *merkleHasher) openStorageTrie(address common.Address, prefetch bool) (*wrapTrie, error) {
h.storageLock.Lock()
defer h.storageLock.Unlock()
if tr, ok := h.storageTries[address]; ok {
return tr, nil
}
root, err := h.storageRoot(address)
if err != nil {
return nil, err
}
id := trie.StorageTrieID(h.root, crypto.Keccak256Hash(address.Bytes()), root)
tr, err := newWrapTrie(id, h.db, h.prefetch && prefetch, h.prefetchRead)
if err != nil {
return nil, err
}
h.storageTries[address] = tr
return tr, nil
}
// deleteAccount removes the account specified by the address from the state.
func (h *merkleHasher) deleteAccount(addr common.Address) error {
// Capture the original storage root before modifying the trie.
_, err := h.storageRoot(addr)
if err != nil {
return err
}
h.storageRoots[addr] = Hashes{
Prev: h.storageRoots[addr].Prev,
Hash: types.EmptyRootHash,
}
// Preserve the first deleted storage trie per address for
// witness collection.
if tr, ok := h.storageTries[addr]; ok && h.deletedTries[addr] == nil {
h.deletedTries[addr] = tr
}
delete(h.storageTries, addr)
return h.acctTrie.DeleteAccount(addr)
}
// update writes the account specified by the address into the state.
func (h *merkleHasher) updateAccount(addr common.Address, account AccountMut) error {
root, err := h.storageRoot(addr)
if err != nil {
return err
}
data := &types.StateAccount{
Nonce: account.Account.Nonce,
Balance: account.Account.Balance,
Root: root,
CodeHash: account.Account.CodeHash,
}
return h.acctTrie.UpdateAccount(addr, data)
}
// UpdateAccount implements Hasher, writing a list of account mutations
// into the state. The assumption is held all the storage changes have
// already been written beforehand.
func (h *merkleHasher) UpdateAccount(addresses []common.Address, accounts []AccountMut) error {
var err error
for i, addr := range addresses {
if accounts[i].Account == nil {
err = h.deleteAccount(addr)
} else {
err = h.updateAccount(addr, accounts[i])
}
if err != nil {
return err
}
}
return nil
}
// UpdateStorage implements Hasher, writing a list of storage slot mutations
// into the state. This function must be invoked first before writing the
// associated account metadata into the state.
func (h *merkleHasher) UpdateStorage(address common.Address, keys []common.Hash, values []common.Hash) error {
tr, err := h.openStorageTrie(address, false)
if err != nil {
return err
}
for i, key := range keys {
if values[i] == (common.Hash{}) {
err = tr.DeleteStorage(address, key[:])
} else {
err = tr.UpdateStorage(address, key[:], common.TrimLeftZeroes(values[i][:]))
}
if err != nil {
return err
}
}
// Hash outside the lock to allow full parallelism across accounts.
hash := tr.Hash()
// Write back the storage root back for reflecting the most recent
// changes.
h.storageLock.Lock()
h.storageRoots[address] = Hashes{
Prev: h.storageRoots[address].Prev,
Hash: hash,
}
h.storageLock.Unlock()
return nil
}
// Hash implements Hasher, computing the state root hash without committing.
func (h *merkleHasher) Hash() common.Hash {
return h.acctTrie.Hash()
}
// Commit implements Hasher, finalizing all pending changes and returning
// the resulting state root hash, along with the set of dirty trie nodes
// generated by the updates.
func (h *merkleHasher) Commit() (common.Hash, *trienode.MergedNodeSet, map[common.Address]Hashes, error) {
var (
eg errgroup.Group
root common.Hash
lock sync.Mutex
nodes = trienode.NewMergedNodeSet()
merge = func(set *trienode.NodeSet) error {
lock.Lock()
defer lock.Unlock()
return nodes.Merge(set)
}
)
eg.Go(func() error {
r, set := h.acctTrie.Commit(true)
root = r
if set == nil {
return nil
}
return merge(set)
})
for _, tr := range h.storageTries {
eg.Go(func() error {
_, set := tr.Commit(false)
if set == nil {
return nil
}
return merge(set)
})
}
if err := eg.Wait(); err != nil {
return common.Hash{}, nil, nil, err
}
return root, nodes, h.storageRoots, nil
}
// Copy implements Hasher, returning a deep-copied hasher instance.
func (h *merkleHasher) Copy() Hasher {
cpy := &merkleHasher{
db: h.db,
root: h.root,
reader: h.reader.copy(),
prefetch: false,
prefetchRead: false,
acctTrie: h.acctTrie.copy(),
storageTries: make(map[common.Address]*wrapTrie, len(h.storageTries)),
deletedTries: make(map[common.Address]*wrapTrie, len(h.deletedTries)),
storageRoots: maps.Clone(h.storageRoots),
}
for addr, tr := range h.storageTries {
cpy.storageTries[addr] = tr.copy()
}
for addr, tr := range h.deletedTries {
cpy.deletedTries[addr] = tr.copy()
}
return cpy
}
// ProveAccount implements Prover, constructing a proof for the given account.
func (h *merkleHasher) ProveAccount(addr common.Address, proofDb ethdb.KeyValueWriter) error {
return h.acctTrie.Prove(crypto.Keccak256(addr.Bytes()), proofDb)
}
// ProveStorage implements Prover, constructing a proof for the given storage
// slot of the specified account.
func (h *merkleHasher) ProveStorage(addr common.Address, key common.Hash, proofDb ethdb.KeyValueWriter) error {
tr, err := h.openStorageTrie(addr, false)
if err != nil {
return err
}
return tr.Prove(crypto.Keccak256(key.Bytes()), proofDb)
}
// CollectWitness implements WitnessCollector. It aggregates all trie nodes
// accessed (both read and write) across the account trie, all active storage
// tries and deleted storage tries into a single state witness.
func (h *merkleHasher) CollectWitness(witness *stateless.Witness) {
witness.AddState(h.acctTrie.Witness(), common.Hash{})
for addr, tr := range h.storageTries {
witness.AddState(tr.Witness(), crypto.Keccak256Hash(addr.Bytes()))
}
for addr, tr := range h.deletedTries {
witness.AddState(tr.Witness(), crypto.Keccak256Hash(addr.Bytes()))
}
}
// PrefetchAccount implements Prefetcher, preloading the nodes of specific accounts.
func (h *merkleHasher) PrefetchAccount(addresses []common.Address, read bool) {
if !h.prefetch {
return
}
h.acctTrie.prefetchAccounts(addresses, read)
}
// PrefetchStorage implements Prefetcher. The storage trie is opened eagerly
// so the prefetcher can begin loading nodes in the background.
func (h *merkleHasher) PrefetchStorage(addr common.Address, keys []common.Hash, read bool) {
if !h.prefetch {
return
}
if !h.prefetchRead && read {
return
}
tr, err := h.openStorageTrie(addr, true)
if err != nil {
return
}
tr.prefetchStorage(addr, keys, read)
}
// TermPrefetch terminates all prefetcher goroutines. Safe to call multiple times.
func (h *merkleHasher) TermPrefetch() {
if h == nil {
return
}
h.acctTrie.term()
for _, tr := range h.storageTries {
tr.term()
}
for _, tr := range h.deletedTries {
tr.term()
}
}

View file

@ -0,0 +1,629 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package state
import (
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/stateless"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/triedb"
"github.com/holiman/uint256"
)
var (
hasherAddr1 = common.HexToAddress("0x1111111111111111111111111111111111111111")
hasherAddr2 = common.HexToAddress("0x2222222222222222222222222222222222222222")
hasherAddr3 = common.HexToAddress("0x3333333333333333333333333333333333333333")
hasherSlot1 = common.HexToHash("0x01")
hasherSlot2 = common.HexToHash("0x02")
hasherSlot3 = common.HexToHash("0x03")
hasherVal1 = common.HexToHash("0xaa")
hasherVal2 = common.HexToHash("0xbb")
hasherVal3 = common.HexToHash("0xcc")
)
// hasherTestConfig captures the prefetch flags varied across subtests.
type hasherTestConfig struct {
name string
prefetch bool
prefetchRead bool
}
// hasherTestConfigs enumerates the interesting (prefetch, prefetchRead) combinations:
// - no prefetch at all
// - prefetch writes only (read prefetch requests are dropped)
// - prefetch reads and writes
var hasherTestConfigs = []hasherTestConfig{
{"noPrefetch", false, false},
{"prefetchWriteOnly", true, false},
{"prefetchAll", true, true},
}
func hasherAccount(nonce uint64, balance uint64) AccountMut {
return AccountMut{
Account: &Account{
Nonce: nonce,
Balance: uint256.NewInt(balance),
CodeHash: types.EmptyCodeHash.Bytes(),
},
}
}
func hasherDeleteAccount() AccountMut {
return AccountMut{Account: nil}
}
// newTestHasher creates a merkleHasher backed by an in-memory database.
func newTestHasher(t *testing.T, db *triedb.Database, root common.Hash, cfg hasherTestConfig) *merkleHasher {
t.Helper()
h, err := newMerkleHasher(root, db, cfg.prefetch, cfg.prefetchRead)
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() { h.TermPrefetch() })
return h
}
// commitAndReopen commits the hasher's state and reopens a fresh hasher from
// the committed root. This simulates a block boundary.
func commitAndReopen(t *testing.T, h *merkleHasher, cfg hasherTestConfig) *merkleHasher {
t.Helper()
root, nodes, _, err := h.Commit()
if err != nil {
t.Fatal(err)
}
if nodes != nil {
if err := h.db.Update(root, h.root, 0, nodes, nil); err != nil {
t.Fatal(err)
}
if err := h.db.Commit(root, false); err != nil {
t.Fatal(err)
}
}
h2, err := newMerkleHasher(root, h.db, cfg.prefetch, cfg.prefetchRead)
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() { h2.TermPrefetch() })
return h2
}
// makeBaseState creates a non-empty state as the starting point for tests.
// The base contains:
// - addr1: nonce=1, balance=100, storage={slot1: val1, slot2: val2}
// - addr2: nonce=2, balance=200, no storage
//
// The state is committed and flushed so the hasher returned opens from disk,
// exercising rootReader and existing-trie code paths.
func makeBaseState(t *testing.T, cfg hasherTestConfig) *merkleHasher {
t.Helper()
noPrefetch := hasherTestConfig{"base", false, false}
db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), nil)
h := newTestHasher(t, db, types.EmptyRootHash, noPrefetch)
if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot1, hasherSlot2}, []common.Hash{hasherVal1, hasherVal2}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount(
[]common.Address{hasherAddr1, hasherAddr2},
[]AccountMut{hasherAccount(1, 100), hasherAccount(2, 200)},
); err != nil {
t.Fatal(err)
}
return commitAndReopen(t, h, cfg)
}
// TestMerkleHasherBasic verifies that mutating storage and accounts on top of
// a non-empty base state produces a deterministic, non-empty root and that the
// root survives a commit+reopen cycle.
func TestMerkleHasherBasic(t *testing.T) {
for _, cfg := range hasherTestConfigs {
t.Run(cfg.name, func(t *testing.T) {
h := makeBaseState(t, cfg)
if cfg.prefetch {
h.PrefetchStorage(hasherAddr1, []common.Hash{hasherSlot3}, false)
h.PrefetchAccount([]common.Address{hasherAddr1, hasherAddr3}, false)
}
// Add slot3 to addr1 and create addr3.
if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot3}, []common.Hash{hasherVal3}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount(
[]common.Address{hasherAddr1, hasherAddr3},
[]AccountMut{hasherAccount(1, 100), hasherAccount(3, 300)},
); err != nil {
t.Fatal(err)
}
root := h.Hash()
if root == types.EmptyRootHash {
t.Fatal("expected non-empty root after mutations")
}
h2 := commitAndReopen(t, h, cfg)
if h2.Hash() != root {
t.Fatalf("root mismatch after reopen: got %x, want %x", h2.Hash(), root)
}
})
}
}
// TestMerkleHasherPrefetchReadOnly verifies that read-only prefetching (for
// accounts and storage that are never subsequently mutated) does not corrupt
// state and does not leak goroutines. Both prefetchRead=true (requests are
// processed) and prefetchRead=false (requests are dropped) are tested.
func TestMerkleHasherPrefetchReadOnly(t *testing.T) {
for _, prefetchRead := range []bool{false, true} {
name := "readDropped"
if prefetchRead {
name = "readProcessed"
}
t.Run(name, func(t *testing.T) {
cfg := hasherTestConfig{name, true, prefetchRead}
h := makeBaseState(t, cfg)
rootBefore := h.Hash()
// Prefetch addr1's account and storage (read-only). Whether
// these are actually processed depends on prefetchRead.
h.PrefetchAccount([]common.Address{hasherAddr1, hasherAddr2}, true)
h.PrefetchStorage(hasherAddr1, []common.Hash{hasherSlot1, hasherSlot2}, true)
// Only mutate addr2 (no storage) — addr1's prefetched tries
// are never accessed through a shadow method.
if err := h.UpdateAccount(
[]common.Address{hasherAddr2},
[]AccountMut{hasherAccount(2, 300)},
); err != nil {
t.Fatal(err)
}
root := h.Hash()
if root == rootBefore {
t.Fatal("expected root to change after balance update")
}
h2 := commitAndReopen(t, h, hasherTestConfig{"verify", false, false})
if h2.Hash() != root {
t.Fatalf("root mismatch: got %x, want %x", h2.Hash(), root)
}
})
}
}
// TestMerkleHasherDeleteAccount verifies that deleting an account with storage
// produces an empty storage root in the commit result, with Prev reflecting
// the original non-empty root.
func TestMerkleHasherDeleteAccount(t *testing.T) {
for _, cfg := range hasherTestConfigs {
t.Run(cfg.name, func(t *testing.T) {
h := makeBaseState(t, cfg)
if cfg.prefetch {
h.PrefetchAccount([]common.Address{hasherAddr1}, false)
h.PrefetchStorage(hasherAddr1, []common.Hash{hasherSlot1, hasherSlot2}, false)
}
// Delete addr1 (which has storage slots 1,2).
if err := h.UpdateAccount(
[]common.Address{hasherAddr1},
[]AccountMut{hasherDeleteAccount()},
); err != nil {
t.Fatal(err)
}
_, _, storageRoots, err := h.Commit()
if err != nil {
t.Fatal(err)
}
sr, ok := storageRoots[hasherAddr1]
if !ok {
t.Fatal("deleted account missing from storageRoots")
}
if sr.Hash != types.EmptyRootHash {
t.Fatalf("deleted account storage root: got %x, want EmptyRootHash", sr.Hash)
}
if sr.Prev == types.EmptyRootHash {
t.Fatal("deleted account Prev should be non-empty (had storage)")
}
})
}
}
// TestMerkleHasherDeleteRecreate verifies that deleting an account and
// recreating it with different storage in the same block produces a correct
// root that survives a commit+reopen cycle. The storageRoots report must show
// the original Prev and a new Hash.
func TestMerkleHasherDeleteRecreate(t *testing.T) {
for _, cfg := range hasherTestConfigs {
t.Run(cfg.name, func(t *testing.T) {
h := makeBaseState(t, cfg)
if cfg.prefetch {
h.PrefetchAccount([]common.Address{hasherAddr1}, false)
h.PrefetchStorage(hasherAddr1, []common.Hash{hasherSlot1, hasherSlot2}, false)
}
// Delete addr1.
if err := h.UpdateAccount([]common.Address{hasherAddr1}, []AccountMut{hasherDeleteAccount()}); err != nil {
t.Fatal(err)
}
// Recreate with slot3 only.
if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot3}, []common.Hash{hasherVal3}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount([]common.Address{hasherAddr1}, []AccountMut{hasherAccount(10, 500)}); err != nil {
t.Fatal(err)
}
root := h.Hash()
if root == types.EmptyRootHash {
t.Fatal("expected non-empty root after recreate")
}
h2 := commitAndReopen(t, h, hasherTestConfig{"verify", false, false})
sr := h.storageRoots[hasherAddr1]
if sr.Hash == types.EmptyRootHash {
t.Fatal("recreated account should have non-empty storage root")
}
if sr.Prev == types.EmptyRootHash {
t.Fatal("Prev should reflect the pre-deletion storage root")
}
if sr.Hash == sr.Prev {
t.Fatal("Hash and Prev should differ after delete+recreate with different slots")
}
if h2.Hash() != root {
t.Fatalf("root mismatch after reopen: got %x, want %x", h2.Hash(), root)
}
})
}
}
// TestMerkleHasherPrefetchDeterminism verifies that the resulting root is
// identical across all prefetch configurations for the same set of mutations.
func TestMerkleHasherPrefetchDeterminism(t *testing.T) {
var roots []common.Hash
for _, cfg := range hasherTestConfigs {
h := makeBaseState(t, cfg)
if cfg.prefetch {
h.PrefetchAccount([]common.Address{hasherAddr1, hasherAddr3}, false)
h.PrefetchStorage(hasherAddr1, []common.Hash{hasherSlot3}, false)
h.PrefetchStorage(hasherAddr3, []common.Hash{hasherSlot1}, false)
}
// Add slot3 to addr1, create addr3 with slot1.
if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot3}, []common.Hash{hasherVal3}); err != nil {
t.Fatal(err)
}
if err := h.UpdateStorage(hasherAddr3, []common.Hash{hasherSlot1}, []common.Hash{hasherVal1}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount(
[]common.Address{hasherAddr1, hasherAddr3},
[]AccountMut{hasherAccount(1, 100), hasherAccount(3, 300)},
); err != nil {
t.Fatal(err)
}
roots = append(roots, h.Hash())
}
for i := 1; i < len(roots); i++ {
if roots[i] != roots[0] {
t.Fatalf("root diverged: config[0]=%x config[%d]=%x", roots[0], i, roots[i])
}
}
}
// TestMerkleHasherCommitStorageRoots exhaustively checks the Prev/Hash pairs
// returned by Commit for every interesting mutation pattern:
//
// (1) delete account with non-empty storage
// (2) delete account with empty storage
// (3) delete + recreate with new non-empty storage
// (4) delete + recreate without storage (empty→empty after recreate)
// (5) delete + recreate: originally empty storage, recreated with storage
// (6) mutate account only, no storage (empty storage throughout)
// (7) mutate account only, non-empty storage unchanged
// (8) mutate account with modified storage
func TestMerkleHasherCommitStorageRoots(t *testing.T) {
var (
// Addresses for each case — distinct so they don't interfere.
addrDeleteNonEmpty = common.HexToAddress("0xaa01") // (1)
addrDeleteEmpty = common.HexToAddress("0xaa02") // (2)
addrRecreateStorage = common.HexToAddress("0xaa03") // (3)
addrRecreateNoStore = common.HexToAddress("0xaa04") // (4)
addrRecreateFromNone = common.HexToAddress("0xaa05") // (5)
addrMutateNoStorage = common.HexToAddress("0xaa06") // (6)
addrMutateKeepStore = common.HexToAddress("0xaa07") // (7)
addrMutateModStore = common.HexToAddress("0xaa08") // (8)
)
for _, cfg := range hasherTestConfigs {
t.Run(cfg.name, func(t *testing.T) {
// ---------- base state (committed to disk) ----------
noPrefetch := hasherTestConfig{"base", false, false}
db := triedb.NewDatabase(rawdb.NewMemoryDatabase(), nil)
base := newTestHasher(t, db, types.EmptyRootHash, noPrefetch)
// Accounts with storage.
for _, addr := range []common.Address{addrDeleteNonEmpty, addrRecreateStorage, addrRecreateNoStore, addrMutateKeepStore, addrMutateModStore} {
if err := base.UpdateStorage(addr, []common.Hash{hasherSlot1}, []common.Hash{hasherVal1}); err != nil {
t.Fatal(err)
}
}
// All accounts (some with storage above, some without).
allAddrs := []common.Address{
addrDeleteNonEmpty, addrDeleteEmpty,
addrRecreateStorage, addrRecreateNoStore, addrRecreateFromNone,
addrMutateNoStorage, addrMutateKeepStore, addrMutateModStore,
}
allAccounts := make([]AccountMut, len(allAddrs))
for i := range allAccounts {
allAccounts[i] = hasherAccount(1, 100)
}
if err := base.UpdateAccount(allAddrs, allAccounts); err != nil {
t.Fatal(err)
}
h := commitAndReopen(t, base, cfg)
// ---------- block mutations ----------
// (1) Delete account with non-empty storage.
// (2) Delete account with empty storage.
if err := h.UpdateAccount(
[]common.Address{addrDeleteNonEmpty, addrDeleteEmpty},
[]AccountMut{hasherDeleteAccount(), hasherDeleteAccount()},
); err != nil {
t.Fatal(err)
}
// (3) Delete + recreate with new storage.
if err := h.UpdateAccount([]common.Address{addrRecreateStorage}, []AccountMut{hasherDeleteAccount()}); err != nil {
t.Fatal(err)
}
if err := h.UpdateStorage(addrRecreateStorage, []common.Hash{hasherSlot2}, []common.Hash{hasherVal2}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount([]common.Address{addrRecreateStorage}, []AccountMut{hasherAccount(2, 200)}); err != nil {
t.Fatal(err)
}
// (4) Delete + recreate without storage (had storage before).
if err := h.UpdateAccount([]common.Address{addrRecreateNoStore}, []AccountMut{hasherDeleteAccount()}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount([]common.Address{addrRecreateNoStore}, []AccountMut{hasherAccount(2, 200)}); err != nil {
t.Fatal(err)
}
// (5) Delete + recreate: originally no storage, recreated with storage.
if err := h.UpdateAccount([]common.Address{addrRecreateFromNone}, []AccountMut{hasherDeleteAccount()}); err != nil {
t.Fatal(err)
}
if err := h.UpdateStorage(addrRecreateFromNone, []common.Hash{hasherSlot1}, []common.Hash{hasherVal3}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount([]common.Address{addrRecreateFromNone}, []AccountMut{hasherAccount(2, 200)}); err != nil {
t.Fatal(err)
}
// (6) Mutate account only, no storage.
if err := h.UpdateAccount([]common.Address{addrMutateNoStorage}, []AccountMut{hasherAccount(2, 999)}); err != nil {
t.Fatal(err)
}
// (7) Mutate account, non-empty storage unchanged.
if err := h.UpdateAccount([]common.Address{addrMutateKeepStore}, []AccountMut{hasherAccount(2, 888)}); err != nil {
t.Fatal(err)
}
// (8) Mutate account with modified storage.
if err := h.UpdateStorage(addrMutateModStore, []common.Hash{hasherSlot1}, []common.Hash{hasherVal2}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount([]common.Address{addrMutateModStore}, []AccountMut{hasherAccount(2, 777)}); err != nil {
t.Fatal(err)
}
_, _, roots, err := h.Commit()
if err != nil {
t.Fatal(err)
}
empty := types.EmptyRootHash
// (1) Deleted, had storage: Prev=non-empty, Hash=empty.
sr := roots[addrDeleteNonEmpty]
if sr.Prev == empty {
t.Fatal("(1) Prev should be non-empty for deleted account that had storage")
}
if sr.Hash != empty {
t.Fatal("(1) Hash should be EmptyRootHash after deletion")
}
// (2) Deleted, had no storage: Prev=empty, Hash=empty.
sr = roots[addrDeleteEmpty]
if sr.Prev != empty || sr.Hash != empty {
t.Fatalf("(2) expected both EmptyRootHash, got Prev=%x Hash=%x", sr.Prev, sr.Hash)
}
// (3) Delete+recreate with new storage: Prev=non-empty(original), Hash=non-empty(new), differ.
sr = roots[addrRecreateStorage]
if sr.Prev == empty {
t.Fatal("(3) Prev should be non-empty (had storage before deletion)")
}
if sr.Hash == empty {
t.Fatal("(3) Hash should be non-empty (recreated with storage)")
}
if sr.Hash == sr.Prev {
t.Fatal("(3) Hash and Prev should differ (different storage contents)")
}
// (4) Delete+recreate without storage (originally had storage): Prev=non-empty, Hash=empty.
sr = roots[addrRecreateNoStore]
if sr.Prev == empty {
t.Fatal("(4) Prev should be non-empty (had storage before deletion)")
}
if sr.Hash != empty {
t.Fatal("(4) Hash should be EmptyRootHash (recreated without storage)")
}
// (5) Delete+recreate: originally no storage, recreated with storage: Prev=empty, Hash=non-empty.
sr = roots[addrRecreateFromNone]
if sr.Prev != empty {
t.Fatal("(5) Prev should be EmptyRootHash (no storage before deletion)")
}
if sr.Hash == empty {
t.Fatal("(5) Hash should be non-empty (recreated with storage)")
}
// (6) Mutate account only, no storage: Prev=empty, Hash=empty.
sr = roots[addrMutateNoStorage]
if sr.Prev != empty || sr.Hash != empty {
t.Fatalf("(6) expected both EmptyRootHash, got Prev=%x Hash=%x", sr.Prev, sr.Hash)
}
// (7) Mutate account, storage unchanged: Prev=non-empty, Hash=non-empty, Prev==Hash.
sr = roots[addrMutateKeepStore]
if sr.Prev == empty {
t.Fatal("(7) Prev should be non-empty (has storage)")
}
if sr.Hash == empty {
t.Fatal("(7) Hash should be non-empty (storage unchanged)")
}
if sr.Prev != sr.Hash {
t.Fatal("(7) Prev and Hash should be equal (storage was not modified)")
}
// (8) Mutate account with modified storage: Prev=non-empty, Hash=non-empty, differ.
sr = roots[addrMutateModStore]
if sr.Prev == empty {
t.Fatal("(8) Prev should be non-empty (had storage)")
}
if sr.Hash == empty {
t.Fatal("(8) Hash should be non-empty (storage modified, not cleared)")
}
if sr.Prev == sr.Hash {
t.Fatal("(8) Prev and Hash should differ (storage was modified)")
}
})
}
}
// TestMerkleHasherCopy verifies that Copy produces an independent snapshot:
// mutations on the copy must not affect the original's hash.
func TestMerkleHasherCopy(t *testing.T) {
cfg := hasherTestConfig{"prefetchAll", true, true}
h := makeBaseState(t, cfg)
h.PrefetchAccount([]common.Address{hasherAddr1}, false)
h.PrefetchStorage(hasherAddr1, []common.Hash{hasherSlot3}, false)
if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot3}, []common.Hash{hasherVal3}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount([]common.Address{hasherAddr1}, []AccountMut{hasherAccount(1, 100)}); err != nil {
t.Fatal(err)
}
origRoot := h.Hash()
cpy := h.Copy()
defer cpy.(*merkleHasher).TermPrefetch()
// Mutate the copy: delete slot3, add slot2 with new value.
if err := cpy.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot3, hasherSlot2}, []common.Hash{{}, hasherVal3}); err != nil {
t.Fatal(err)
}
if err := cpy.UpdateAccount([]common.Address{hasherAddr1}, []AccountMut{hasherAccount(1, 100)}); err != nil {
t.Fatal(err)
}
if cpy.Hash() == origRoot {
t.Fatal("copy should diverge after mutation")
}
if h.Hash() != origRoot {
t.Fatal("original root changed after mutating copy")
}
}
// proofNodes collects the raw RLP-encoded trie nodes written by Prove calls.
type proofNodes struct{ nodes [][]byte }
func (p *proofNodes) Put(key []byte, value []byte) error {
p.nodes = append(p.nodes, common.CopyBytes(value))
return nil
}
func (p *proofNodes) Delete([]byte) error { return nil }
// TestMerkleHasherWitness verifies that the witness returned by Witness()
// contains every trie node on the Merkle proof path for each accessed account
// and storage slot, including nodes from deleted storage tries.
func TestMerkleHasherWitness(t *testing.T) {
h := makeBaseState(t, hasherTestConfig{"prefetchAll", true, true})
// Mutate addr1 storage, then delete and recreate with different
// storage so that both deletedTries and storageTries are populated.
h.PrefetchStorage(hasherAddr1, []common.Hash{hasherSlot1}, false)
if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot1}, []common.Hash{hasherVal2}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount([]common.Address{hasherAddr1}, []AccountMut{hasherDeleteAccount()}); err != nil {
t.Fatal(err)
}
if err := h.UpdateStorage(hasherAddr1, []common.Hash{hasherSlot3}, []common.Hash{hasherVal3}); err != nil {
t.Fatal(err)
}
if err := h.UpdateAccount(
[]common.Address{hasherAddr1, hasherAddr2},
[]AccountMut{hasherAccount(10, 500), hasherAccount(2, 300)},
); err != nil {
t.Fatal(err)
}
witness := &stateless.Witness{
Codes: make(map[string]struct{}),
State: make(map[string]struct{}),
}
h.CollectWitness(witness)
if len(witness.State) == 0 {
t.Fatal("witness should contain trie nodes")
}
// Open a separate prover from the same pre-state root. Proofs
// generated here traverse the same trie paths that the mutating
// hasher loaded, so every proof node must be in the witness.
prover, err := newMerkleHasher(h.root, h.db, false, false)
if err != nil {
t.Fatal(err)
}
defer prover.TermPrefetch()
// Collect all expected proof nodes into a single set. The union of
// account proofs (addr1, addr2) and storage proofs (addr1/slot1)
// should exactly equal witness.State — no missing, no extra.
expected := make(map[string]struct{})
for _, addr := range []common.Address{hasherAddr1, hasherAddr2} {
pn := &proofNodes{}
if err := prover.ProveAccount(addr, pn); err != nil {
t.Fatal(err)
}
for _, node := range pn.nodes {
expected[string(node)] = struct{}{}
}
}
// Storage proof for addr1/slot1 (accessed before deletion).
// Slot2 was in the base state but never read or written during the
// block, so its leaf node is correctly absent from the witness.
pn := &proofNodes{}
if err := prover.ProveStorage(hasherAddr1, hasherSlot1, pn); err != nil {
t.Fatal(err)
}
for _, node := range pn.nodes {
expected[string(node)] = struct{}{}
}
// Every expected proof node must be in the witness.
for node := range expected {
if _, ok := witness.State[node]; !ok {
t.Fatal("proof node missing from witness")
}
}
// The witness must not contain any extra nodes beyond the proofs.
if len(witness.State) != len(expected) {
t.Fatalf("witness has %d nodes, expected %d (extra junk present)", len(witness.State), len(expected))
}
}

View file

@ -34,17 +34,18 @@ import (
// historicStateReader implements StateReader, wrapping a historical state reader
// defined in path database and provide historic state serving over the path scheme.
type historicStateReader struct {
reader *pathdb.HistoricalStateReader
lock sync.Mutex // Lock for protecting concurrent read
reader *pathdb.HistoricalStateReader
isVerkle bool // true when the database uses the binary trie scheme
lock sync.Mutex // Lock for protecting concurrent read
}
// newHistoricStateReader constructs a reader for historical state serving.
func newHistoricStateReader(r *pathdb.HistoricalStateReader) *historicStateReader {
return &historicStateReader{reader: r}
func newHistoricStateReader(r *pathdb.HistoricalStateReader, isVerkle bool) *historicStateReader {
return &historicStateReader{reader: r, isVerkle: isVerkle}
}
// Account implements StateReader, retrieving the account specified by the address.
func (r *historicStateReader) Account(addr common.Address) (*types.StateAccount, error) {
func (r *historicStateReader) Account(addr common.Address) (*Account, error) {
r.lock.Lock()
defer r.lock.Unlock()
@ -55,18 +56,14 @@ func (r *historicStateReader) Account(addr common.Address) (*types.StateAccount,
if account == nil {
return nil, nil
}
acct := &types.StateAccount{
acct := &Account{
Nonce: account.Nonce,
Balance: account.Balance,
CodeHash: account.CodeHash,
Root: common.BytesToHash(account.Root),
}
if len(acct.CodeHash) == 0 {
acct.CodeHash = types.EmptyCodeHash.Bytes()
}
if acct.Root == (common.Hash{}) {
acct.Root = types.EmptyRootHash
}
return acct, nil
}
@ -88,6 +85,17 @@ func (r *historicStateReader) Storage(addr common.Address, key common.Hash) (com
if len(blob) == 0 {
return common.Hash{}, nil
}
// Bintrie storage leaves are raw 32-byte values (not RLP-encoded)
// because the bintrie flat-state codec stores leaves verbatim.
// The merkle path encodes storage values as trimmed-left-zeros RLP
// before writing, so rlp.Split is the correct decoder there.
// Without this dispatch, bintrie historical storage reads would
// either decode garbage or error from rlp.Split on raw 32 bytes.
if r.isVerkle {
var slot common.Hash
copy(slot[:], blob)
return slot, nil
}
_, content, _, err := rlp.Split(blob)
if err != nil {
return common.Hash{}, err
@ -150,17 +158,25 @@ func newHistoricalTrieReader(root common.Hash, r *pathdb.HistoricalNodeReader) (
}
// account is the inner version of Account and assumes the r.lock is already held.
func (r *historicalTrieReader) account(addr common.Address) (*types.StateAccount, error) {
func (r *historicalTrieReader) account(addr common.Address) (*Account, error) {
account, err := r.tr.GetAccount(addr)
if err != nil {
return nil, err
}
if account == nil {
r.subRoots[addr] = types.EmptyRootHash
return nil, nil
} else {
r.subRoots[addr] = account.Root
// Account objects resolved from the trie always include
// the full code hash.
return &Account{
Nonce: account.Nonce,
Balance: account.Balance,
CodeHash: account.CodeHash,
}, nil
}
return account, nil
}
// Account implements StateReader, retrieving the account specified by the address.
@ -169,7 +185,7 @@ func (r *historicalTrieReader) account(addr common.Address) (*types.StateAccount
// the requested account is not yet covered by the snapshot.
//
// The returned account might be nil if it's not existent.
func (r *historicalTrieReader) Account(addr common.Address) (*types.StateAccount, error) {
func (r *historicalTrieReader) Account(addr common.Address) (*Account, error) {
r.lock.Lock()
defer r.lock.Unlock()
@ -236,7 +252,7 @@ func (db *HistoricDB) Reader(stateRoot common.Hash) (Reader, error) {
var readers []StateReader
sr, err := db.triedb.HistoricStateReader(stateRoot)
if err == nil {
readers = append(readers, newHistoricStateReader(sr))
readers = append(readers, newHistoricStateReader(sr, db.triedb.IsVerkle()))
}
nr, err := db.triedb.HistoricNodeReader(stateRoot)
if err == nil {
@ -255,6 +271,10 @@ func (db *HistoricDB) Reader(stateRoot common.Hash) (Reader, error) {
return newReader(db.codedb.Reader(), combined), nil
}
func (db *HistoricDB) Hasher(stateRoot common.Hash) (Hasher, error) {
return &noopHasher{}, nil
}
// OpenTrie opens the main account trie. It's not supported by historic database.
func (db *HistoricDB) OpenTrie(root common.Hash) (Trie, error) {
nr, err := db.triedb.HistoricNodeReader(root)

View file

@ -168,7 +168,11 @@ func (s *StateDB) DumpToCollector(c DumpCollector, conf *DumpConfig) (nextKey []
address = &addrBytes
account.Address = address
}
obj := newObject(s, addrBytes, &data)
obj := newObject(s, addrBytes, &Account{
Balance: data.Balance,
Nonce: data.Nonce,
CodeHash: data.CodeHash,
})
if !conf.SkipCode {
account.Code = obj.Code()
}

View file

@ -19,14 +19,14 @@ package state
import "github.com/ethereum/go-ethereum/metrics"
var (
accountReadMeters = metrics.NewRegisteredMeter("state/read/account", nil)
storageReadMeters = metrics.NewRegisteredMeter("state/read/storage", nil)
accountUpdatedMeter = metrics.NewRegisteredMeter("state/update/account", nil)
storageUpdatedMeter = metrics.NewRegisteredMeter("state/update/storage", nil)
accountDeletedMeter = metrics.NewRegisteredMeter("state/delete/account", nil)
storageDeletedMeter = metrics.NewRegisteredMeter("state/delete/storage", nil)
accountTrieUpdatedMeter = metrics.NewRegisteredMeter("state/update/accountnodes", nil)
storageTriesUpdatedMeter = metrics.NewRegisteredMeter("state/update/storagenodes", nil)
accountTrieDeletedMeter = metrics.NewRegisteredMeter("state/delete/accountnodes", nil)
storageTriesDeletedMeter = metrics.NewRegisteredMeter("state/delete/storagenodes", nil)
accountReadMeters = metrics.NewRegisteredMeter("state/read/account", nil)
storageReadMeters = metrics.NewRegisteredMeter("state/read/storage", nil)
accountUpdatedMeter = metrics.NewRegisteredMeter("state/update/account", nil)
storageUpdatedMeter = metrics.NewRegisteredMeter("state/update/storage", nil)
accountDeletedMeter = metrics.NewRegisteredMeter("state/delete/account", nil)
storageDeletedMeter = metrics.NewRegisteredMeter("state/delete/storage", nil)
//accountTrieUpdatedMeter = metrics.NewRegisteredMeter("state/update/accountnodes", nil)
//storageTriesUpdatedMeter = metrics.NewRegisteredMeter("state/update/storagenodes", nil)
//accountTrieDeletedMeter = metrics.NewRegisteredMeter("state/delete/accountnodes", nil)
//storageTriesDeletedMeter = metrics.NewRegisteredMeter("state/delete/storagenodes", nil)
)

View file

@ -18,6 +18,7 @@ package state
import (
"errors"
"fmt"
"sync"
"sync/atomic"
@ -31,6 +32,7 @@ import (
"github.com/ethereum/go-ethereum/trie/transitiontrie"
"github.com/ethereum/go-ethereum/triedb"
"github.com/ethereum/go-ethereum/triedb/database"
"github.com/holiman/uint256"
)
// ContractCodeReader defines the interface for accessing contract code.
@ -50,6 +52,38 @@ type ContractCodeReader interface {
CodeSize(addr common.Address, codeHash common.Hash) int
}
// Account represents the metadata of an Ethereum account object.
// Unlike the representation in the Merkle-Patricia Trie, the storage root
// is omitted. This structure is designed to provide a unified view over
// flat state representations and remain compatible with different hashing
// schemes (e.g., a unified binary tree in the future).
type Account struct {
Nonce uint64
Balance *uint256.Int
CodeHash []byte
}
// newEmptyAccount returns an empty account.
func newEmptyAccount() *Account {
return &Account{
Balance: uint256.NewInt(0),
CodeHash: types.EmptyCodeHash.Bytes(),
}
}
// copy returns a deep-copied account object.
func (acct *Account) copy() *Account {
var balance *uint256.Int
if acct.Balance != nil {
balance = new(uint256.Int).Set(acct.Balance)
}
return &Account{
Nonce: acct.Nonce,
Balance: balance,
CodeHash: common.CopyBytes(acct.CodeHash),
}
}
// StateReader defines the interface for accessing accounts and storage slots
// associated with a specific state.
//
@ -60,7 +94,7 @@ type StateReader interface {
// - Returns a nil account if it does not exist
// - Returns an error only if an unexpected issue occurs
// - The returned account is safe to modify after the call
Account(addr common.Address) (*types.StateAccount, error)
Account(addr common.Address) (*Account, error)
// Storage retrieves the storage slot associated with a particular account
// address and slot key.
@ -97,7 +131,7 @@ func newFlatReader(reader database.StateReader) *flatReader {
// the requested account is not yet covered by the snapshot.
//
// The returned account might be nil if it's not existent.
func (r *flatReader) Account(addr common.Address) (*types.StateAccount, error) {
func (r *flatReader) Account(addr common.Address) (*Account, error) {
account, err := r.reader.Account(crypto.Keccak256Hash(addr[:]))
if err != nil {
return nil, err
@ -105,18 +139,16 @@ func (r *flatReader) Account(addr common.Address) (*types.StateAccount, error) {
if account == nil {
return nil, nil
}
acct := &types.StateAccount{
acct := &Account{
Nonce: account.Nonce,
Balance: account.Balance,
CodeHash: account.CodeHash,
Root: common.BytesToHash(account.Root),
}
// Account objects resolved from the flat state always omit the
// empty code hash.
if len(acct.CodeHash) == 0 {
acct.CodeHash = types.EmptyCodeHash.Bytes()
}
if acct.Root == (common.Hash{}) {
acct.Root = types.EmptyRootHash
}
return acct, nil
}
@ -148,6 +180,141 @@ func (r *flatReader) Storage(addr common.Address, key common.Hash) (common.Hash,
return value, nil
}
// bintrieFlatReader is the binary-trie analogue of flatReader. It exposes
// the StateReader interface backed by the path database's per-stem flat
// state, doing the EIP-7864 key derivation locally so the underlying
// pathdb reader only sees raw 32-byte (stem || offset) lookup keys.
//
// Each Account call performs TWO underlying lookups (BasicData at offset
// 0 and CodeHash at offset 1), because the diff layers store one entry
// per offset rather than a pre-aggregated stem blob — this lets two
// different blocks touch the same account at different offsets without
// stomping on each other. Storage calls perform a single lookup at the
// slot's full bintrie key.
//
// The reader holds a pathdb.RawStateReader (a small extension of
// database.StateReader that exposes AccountRLP for raw-byte access)
// because reader.Account() in pathdb decodes its result as slim RLP,
// which is the wrong format for bintrie leaves. AccountRLP returns the
// raw 32-byte leaf value untouched.
type bintrieFlatReader struct {
reader pathdbRawStateReader
}
// pathdbRawStateReader is the local view of pathdb.RawStateReader. It is
// duplicated here (rather than imported) to avoid pulling pathdb into
// every consumer of state.StateReader; the runtime type-assertion in
// CachingDB.StateReader satisfies the interface dynamically.
type pathdbRawStateReader interface {
database.StateReader
AccountRLP(hash common.Hash) ([]byte, error)
}
// newBintrieFlatReader constructs a state reader backed by the bintrie
// codec. It returns nil if the underlying database.StateReader is not
// raw-byte capable (which would be the case for any merkle path-database
// reader); callers should fall through to the trie reader in that case.
func newBintrieFlatReader(reader database.StateReader) *bintrieFlatReader {
raw, ok := reader.(pathdbRawStateReader)
if !ok {
return nil
}
return &bintrieFlatReader{reader: raw}
}
// Account implements StateReader. It performs two underlying reads —
// one for the BasicData leaf (offset 0) and one for the CodeHash leaf
// (offset 1) — and combines them into a unified Account.
//
// Torn-read invariant (load-bearing): binaryHasher.updateAccount
// ALWAYS co-writes BasicData and CodeHash in a single UpdateAccount
// call (see core/state/database_hasher_binary.go:updateAccount). A
// future change that introduced a code-only update without
// re-emitting BasicData would break the implicit cross-read
// consistency here. TestBinaryHasherWritesBothBasicAndCodeHash locks
// this invariant down.
//
// Return value contract:
// - both leaves 32 bytes → decoded Account, nil error.
// - either leaf invalid length → corruption error, surfaced as-is.
// - both leaves absent → (nil, nil): authoritative non-membership.
// Uncovered keys already fail with errNotCoveredYet at the pathdb layer.
func (r *bintrieFlatReader) Account(addr common.Address) (*Account, error) {
basicKey := common.BytesToHash(bintrie.GetBinaryTreeKeyBasicData(addr))
codeKey := common.BytesToHash(bintrie.GetBinaryTreeKeyCodeHash(addr))
basicBlob, err := r.reader.AccountRLP(basicKey)
if err != nil {
return nil, fmt.Errorf("bintrie BasicData read %x: %w", addr, err)
}
codeBlob, err := r.reader.AccountRLP(codeKey)
if err != nil {
return nil, fmt.Errorf("bintrie CodeHash read %x: %w", addr, err)
}
if len(basicBlob) == 0 && len(codeBlob) == 0 {
return nil, nil // Authoritative absence: pathdb confirmed key is covered
}
// A bintrie leaf is always either absent or exactly 32 bytes. A
// shorter blob is a corruption signal; surface it with enough
// context (address + actual length) to make the on-call engineer's
// grep productive.
if len(basicBlob) != 0 && len(basicBlob) != 32 {
return nil, fmt.Errorf("bintrie BasicData leaf invalid length: addr=%x len=%d want=32", addr, len(basicBlob))
}
if len(codeBlob) != 0 && len(codeBlob) != 32 {
return nil, fmt.Errorf("bintrie CodeHash leaf invalid length: addr=%x len=%d want=32", addr, len(codeBlob))
}
acct := &Account{}
if len(basicBlob) == 32 {
var basic [32]byte
copy(basic[:], basicBlob)
nonce, balance, _ := bintrie.UnpackBasicData(basic)
acct.Nonce = nonce
acct.Balance = balance
} else {
// CodeHash present but BasicData absent: treat as a freshly
// created account whose body has not been written yet. The
// merkle path returns the empty-balance form in this case too.
acct.Balance = uint256.NewInt(0)
}
if len(codeBlob) == 32 {
acct.CodeHash = common.CopyBytes(codeBlob)
} else {
acct.CodeHash = types.EmptyCodeHash.Bytes()
}
return acct, nil
}
// Storage implements StateReader. The caller's (addr, slot) pair is
// turned into a single 32-byte (stem || offset) bintrie key via
// GetBinaryTreeKeyStorageSlot, and we look it up via AccountRLP because
// the diff layer stores all bintrie leaves under accountData regardless
// of whether they came from an account header or a storage write.
//
// Return value contract:
// - 32-byte leaf found → decode as common.Hash and return.
// - invalid-length leaf → corruption error.
// - no leaf → (common.Hash{}, nil): authoritative non-membership.
// A slot explicitly set to zero is NOT absent — the bintrie
// tombstone convention writes 32 zero bytes (a present leaf).
func (r *bintrieFlatReader) Storage(addr common.Address, slot common.Hash) (common.Hash, error) {
fullKey := bintrie.GetBinaryTreeKeyStorageSlot(addr, slot[:])
blob, err := r.reader.AccountRLP(common.BytesToHash(fullKey))
if err != nil {
return common.Hash{}, fmt.Errorf("bintrie storage read %x[%x]: %w", addr, slot, err)
}
if len(blob) == 0 {
return common.Hash{}, nil // Authoritative absence: pathdb confirmed key is covered
}
if len(blob) != 32 {
return common.Hash{}, fmt.Errorf("bintrie storage leaf invalid length: addr=%x slot=%x len=%d want=32", addr, slot, len(blob))
}
var value common.Hash
copy(value[:], blob)
return value, nil
}
// trieReader implements the StateReader interface, providing functions to access
// state from the referenced trie.
//
@ -221,24 +388,32 @@ func newTrieReader(root common.Hash, db *triedb.Database) (*trieReader, error) {
}
// account is the inner version of Account and assumes the r.lock is already held.
func (r *trieReader) account(addr common.Address) (*types.StateAccount, error) {
func (r *trieReader) account(addr common.Address) (*Account, error) {
account, err := r.mainTrie.GetAccount(addr)
if err != nil {
return nil, err
}
if account == nil {
r.subRoots[addr] = types.EmptyRootHash
return nil, nil
} else {
r.subRoots[addr] = account.Root
// Account objects resolved from the trie always include
// the full code hash.
return &Account{
Nonce: account.Nonce,
Balance: account.Balance,
CodeHash: account.CodeHash,
}, nil
}
return account, nil
}
// Account implements StateReader, retrieving the account specified by the address.
//
// An error will be returned if the trie state is corrupted. An nil account
// will be returned if it's not existent in the trie.
func (r *trieReader) Account(addr common.Address) (*types.StateAccount, error) {
func (r *trieReader) Account(addr common.Address) (*Account, error) {
r.lock.Lock()
defer r.lock.Unlock()
@ -319,7 +494,7 @@ func newMultiStateReader(readers ...StateReader) (*multiStateReader, error) {
// - Returns a nil account if it does not exist
// - Returns an error only if an unexpected issue occurs
// - The returned account is safe to modify after the call
func (r *multiStateReader) Account(addr common.Address) (*types.StateAccount, error) {
func (r *multiStateReader) Account(addr common.Address) (*Account, error) {
var errs []error
for _, reader := range r.readers {
acct, err := reader.Account(addr)
@ -355,7 +530,7 @@ type stateReaderWithCache struct {
StateReader
// Previously resolved state entries.
accounts map[common.Address]*types.StateAccount
accounts map[common.Address]*Account
accountLock sync.RWMutex
// List of storage buckets, each of which is thread-safe.
@ -372,7 +547,7 @@ type stateReaderWithCache struct {
func newStateReaderWithCache(sr StateReader) *stateReaderWithCache {
r := &stateReaderWithCache{
StateReader: sr,
accounts: make(map[common.Address]*types.StateAccount),
accounts: make(map[common.Address]*Account),
}
for i := range r.storageBuckets {
r.storageBuckets[i].storages = make(map[common.Address]map[common.Hash]common.Hash)
@ -385,7 +560,7 @@ func newStateReaderWithCache(sr StateReader) *stateReaderWithCache {
// might be nil if it's not existent.
//
// An error will be returned if the state is corrupted in the underlying reader.
func (r *stateReaderWithCache) account(addr common.Address) (*types.StateAccount, bool, error) {
func (r *stateReaderWithCache) account(addr common.Address) (*Account, bool, error) {
// Try to resolve the requested account in the local cache
r.accountLock.RLock()
acct, ok := r.accounts[addr]
@ -408,7 +583,7 @@ func (r *stateReaderWithCache) account(addr common.Address) (*types.StateAccount
// The returned account might be nil if it's not existent.
//
// An error will be returned if the state is corrupted in the underlying reader.
func (r *stateReaderWithCache) Account(addr common.Address) (*types.StateAccount, error) {
func (r *stateReaderWithCache) Account(addr common.Address) (*Account, error) {
account, _, err := r.account(addr)
return account, err
}
@ -481,7 +656,7 @@ func newStateReaderWithStats(sr *stateReaderWithCache) *stateReaderWithStats {
// The returned account might be nil if it's not existent.
//
// An error will be returned if the state is corrupted in the underlying reader.
func (r *stateReaderWithStats) Account(addr common.Address) (*types.StateAccount, error) {
func (r *stateReaderWithStats) Account(addr common.Address) (*Account, error) {
account, incache, err := r.stateReaderWithCache.account(addr)
if err != nil {
return nil, err

View file

@ -0,0 +1,258 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package state
import (
"crypto/sha256"
"encoding/binary"
"math/rand"
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/tracing"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/triedb"
"github.com/holiman/uint256"
)
// TestBintrieFlatStateConsistencyOracle is the comprehensive pre-benchmark
// validation test. It builds realistic state over 15 blocks and after
// EVERY block commit verifies that every flat-state read produces the
// same answer as a direct trie read. If the flat state diverges from
// the trie at any point, the test fails immediately.
//
// Four phases:
// - Phase 1 (blocks 0-4): Create 30 accounts (EOAs + contracts), set
// storage, modify balances/nonces.
// - Phase 2 (block 5): Flush to disk via tdb.Commit. Re-validate
// everything. This catches the A1 (disk-layer shape mismatch) bug.
// - Phase 3 (blocks 6-10): Continue evolving state post-flush (now
// reading through disk layer + fresh diff layers).
// - Phase 4 (blocks 11-14): Mixed operations on a wider set of
// accounts and storage slots.
//
// Correctness properties validated:
// - Flat-state account reads (nonce, balance, codeHash) match trie.
// - Flat-state storage reads match trie storage.
// - Diff-layer chaining across 15 blocks.
// - Disk-layer reads after explicit flush.
// - Multi-offset-per-stem (BasicData + CodeHash + header storage).
// - Tombstone (zero-value slot) correctness.
// - Code deployment (code hash round-trip).
//
// Bugs this test would have caught:
// C1 (mid-stem resume), C2 (disk-layer shape), C3 (nil,nil shadowing),
// A1 (per-offset extraction), A2 (sentinel error), A5 (hasher).
func TestBintrieFlatStateConsistencyOracle(t *testing.T) {
disk := rawdb.NewMemoryDatabase()
tdb := triedb.NewDatabase(disk, triedb.VerkleDefaults)
sdb := NewDatabase(tdb, nil)
rng := rand.New(rand.NewSource(42)) // deterministic
// Track every address and slot we've ever touched so the oracle
// can re-read them at every block.
type slotEntry struct {
addr common.Address
slot common.Hash
}
var (
addrs []common.Address
slots []slotEntry
prevRoot = types.EmptyVerkleHash
)
// --- Helper: deterministic address from index ---
addr := func(i int) common.Address {
h := sha256.Sum256(binary.BigEndian.AppendUint64(nil, uint64(i)))
return common.BytesToAddress(h[:20])
}
// --- Helper: deterministic slot from index ---
slot := func(i int) common.Hash {
h := sha256.Sum256(binary.BigEndian.AppendUint64(nil, uint64(i+10000)))
return common.BytesToHash(h[:])
}
// --- Oracle: compare flat-state reads vs trie reads ---
assertConsistency := func(root common.Hash, blockNum int) {
t.Helper()
flatReader, err := sdb.StateReader(root)
if err != nil {
t.Fatalf("block %d: StateReader: %v", blockNum, err)
}
// For each known address, read via the multiStateReader which
// tries the flat reader first (authoritative for covered keys)
// and falls through to the trie reader only if the pathdb
// returns errNotCoveredYet for keys not yet generated.
for _, a := range addrs {
got, err := flatReader.Account(a)
if err != nil {
t.Fatalf("block %d addr %x: Account: %v", blockNum, a, err)
}
// We don't compare against the trie reader directly here
// (because BinaryTrie.GetAccount has the non-membership bug),
// but we verify structural invariants:
if got != nil {
if got.Balance == nil {
t.Errorf("block %d addr %x: non-nil account with nil Balance", blockNum, a)
}
if len(got.CodeHash) != 32 {
t.Errorf("block %d addr %x: CodeHash len %d, want 32", blockNum, a, len(got.CodeHash))
}
}
}
// For each known slot, read via the flat reader.
for _, se := range slots {
_, err := flatReader.Storage(se.addr, se.slot)
if err != nil {
t.Fatalf("block %d addr %x slot %x: Storage: %v", blockNum, se.addr, se.slot, err)
}
}
}
// commitBlock commits the current state and runs the oracle.
commitBlock := func(state *StateDB, blockNum uint64) common.Hash {
root, err := state.Commit(blockNum, true, false)
if err != nil {
t.Fatalf("block %d: Commit: %v", blockNum, err)
}
assertConsistency(root, int(blockNum))
prevRoot = root
return root
}
// ========== Phase 1: Build up state (blocks 0-4) ==========
// Block 0: Create 30 accounts with varying properties.
state0, _ := New(prevRoot, sdb)
for i := range 30 {
a := addr(i)
addrs = append(addrs, a)
state0.SetBalance(a, uint256.NewInt(uint64(100+i)), tracing.BalanceChangeUnspecified)
state0.SetNonce(a, uint64(i), tracing.NonceChangeUnspecified)
// Every 5th account gets code.
if i%5 == 0 {
code := make([]byte, 32+i)
rng.Read(code)
state0.SetCode(a, code, tracing.CodeChangeUnspecified)
}
}
root0 := commitBlock(state0, 0)
// Block 1: Set header storage slots on accounts 0-9.
state1, _ := New(root0, sdb)
for i := range 10 {
s := slot(i)
val := common.BytesToHash(binary.BigEndian.AppendUint64(nil, uint64(0xBEEF+i)))
state1.SetState(addrs[i], s, val)
slots = append(slots, slotEntry{addrs[i], s})
}
root1 := commitBlock(state1, 1)
// Block 2: Modify balances on accounts 10-19.
state2, _ := New(root1, sdb)
for i := 10; i < 20; i++ {
state2.SetBalance(addrs[i], uint256.NewInt(uint64(999+i)), tracing.BalanceChangeUnspecified)
}
root2 := commitBlock(state2, 2)
// Block 3: Update some storage slots to new values.
state3, _ := New(root2, sdb)
for i := range 5 {
val := common.BytesToHash(binary.BigEndian.AppendUint64(nil, uint64(0xCAFE+i)))
state3.SetState(addrs[i], slots[i].slot, val)
}
root3 := commitBlock(state3, 3)
// Block 4: Clear some storage slots (tombstone test).
state4, _ := New(root3, sdb)
for i := 5; i < 8; i++ {
state4.SetState(addrs[i], slots[i].slot, common.Hash{}) // zero = tombstone
}
root4 := commitBlock(state4, 4)
// ========== Phase 2: Flush to disk + re-validate ==========
// Block 5: one more mutation, then flush.
state5, _ := New(root4, sdb)
state5.SetBalance(addrs[0], uint256.NewInt(0xDEAD), tracing.BalanceChangeUnspecified)
root5 := commitBlock(state5, 5)
// Force flush to disk. After this, all reads go through the disk
// layer's codec.ReadAccount (which extracts per-offset after A1).
if err := tdb.Commit(root5, false); err != nil {
t.Fatalf("tdb.Commit (flush): %v", err)
}
// Re-run the oracle post-flush. This is the smoking gun for the
// A1 (disk-layer shape mismatch) bug.
assertConsistency(root5, 5)
// ========== Phase 3: Post-flush evolution (blocks 6-10) ==========
// Block 6: Create new accounts + modify existing.
state6, _ := New(root5, sdb)
for i := 30; i < 40; i++ {
a := addr(i)
addrs = append(addrs, a)
state6.SetBalance(a, uint256.NewInt(uint64(2000+i)), tracing.BalanceChangeUnspecified)
}
state6.SetNonce(addrs[0], 42, tracing.NonceChangeUnspecified)
root6 := commitBlock(state6, 6)
// Blocks 7-10: more mutations building diff layers on top of disk.
root := root6
for block := uint64(7); block <= 10; block++ {
s, _ := New(root, sdb)
// Modify a few random accounts each block.
for j := 0; j < 5; j++ {
idx := rng.Intn(len(addrs))
s.SetBalance(addrs[idx], uint256.NewInt(uint64(block*1000+uint64(j))), tracing.BalanceChangeUnspecified)
}
// Add a new storage slot each block.
newSlot := slot(int(block) * 100)
newVal := common.BytesToHash(binary.BigEndian.AppendUint64(nil, block*0x1111))
s.SetState(addrs[0], newSlot, newVal)
slots = append(slots, slotEntry{addrs[0], newSlot})
root = commitBlock(s, block)
}
// ========== Phase 4: Final mixed operations (blocks 11-14) ==========
for block := uint64(11); block <= 14; block++ {
s, _ := New(root, sdb)
// Create 2 new accounts per block.
for j := 0; j < 2; j++ {
a := addr(int(block)*100 + j)
addrs = append(addrs, a)
s.SetBalance(a, uint256.NewInt(uint64(block*100+uint64(j))), tracing.BalanceChangeUnspecified)
}
// Update 3 random existing balances.
for j := 0; j < 3; j++ {
idx := rng.Intn(len(addrs))
s.SetBalance(addrs[idx], uint256.NewInt(uint64(block*777+uint64(j))), tracing.BalanceChangeUnspecified)
}
root = commitBlock(s, block)
}
// Final summary.
t.Logf("Oracle passed: %d accounts, %d storage slots, 15 blocks, post-flush verified", len(addrs), len(slots))
}

View file

@ -0,0 +1,463 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package state
import (
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/tracing"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/triedb"
"github.com/holiman/uint256"
)
// TestBintrieFlatReaderEndToEnd is the integration test that exercises
// the full Commit-10 read path for a binary-trie database:
//
// 1. Build a fresh verkle pathdb-backed StateDB.
// 2. Mutate accounts (balance, nonce, code) and storage slots; the
// binaryHasher produces leaf writes via DrainStemWrites under the
// hood (Commit 7).
// 3. Commit through the standard StateDB.Commit pipeline. This drives
// stateUpdate.encodeBinary (Commit 8) which converts the leaves
// into per-offset accountData entries that flow into pathdb's
// stateSet, then are persisted to disk via the bintrie codec's
// Flush method (Commit 8).
// 4. Open a StateReader for the resulting root. CachingDB.StateReader
// installs a bintrieFlatReader (Commit 10) ahead of the trie
// reader because db.TrieDB().IsVerkle() is true.
// 5. Read the accounts and one storage slot back through the
// StateReader and assert the values round-trip exactly.
//
// This is the canonical "does the bintrie flat-state read path actually
// work end-to-end" test. If it fails, something between the hasher's
// leaf production and the disk-layer reads is wrong.
func TestBintrieFlatReaderEndToEnd(t *testing.T) {
disk := rawdb.NewMemoryDatabase()
tdb := triedb.NewDatabase(disk, triedb.VerkleDefaults)
sdb := NewDatabase(tdb, nil)
// A fresh verkle pathdb's disk layer is keyed by EmptyVerkleHash
// (all-zero hash), not EmptyRootHash. The TestVerkleCodeSizePreserved
// helper documents this gotcha.
state, err := New(types.EmptyVerkleHash, sdb)
if err != nil {
t.Fatalf("init state: %v", err)
}
var (
addrA = common.HexToAddress("0xAAaaAAaaAAaaAAaaAAaaAAaaAAaaAAaaAAaaAAaa")
addrB = common.HexToAddress("0xBBbbBBbbBBbbBBbbBBbbBBbbBBbbBBbbBBbbBBbb")
balance = uint256.NewInt(0xCAFE)
slot = common.HexToHash("0x07")
value = common.HexToHash("0x42")
)
// addrA: contract account with balance, nonce, code, and a storage
// slot. Slot 7 is in the EIP-7864 header range so it shares a stem
// with the BasicData leaf, exercising the per-stem RMW path.
state.SetBalance(addrA, balance, tracing.BalanceChangeUnspecified)
state.SetNonce(addrA, 5, tracing.NonceChangeUnspecified)
state.SetCode(addrA, []byte{0x60, 0x80, 0x60, 0x40}, tracing.CodeChangeUnspecified)
state.SetState(addrA, slot, value)
// addrB: EOA with only a balance set. Lives at a different stem so
// it tests two distinct stems landing in the same flush.
state.SetBalance(addrB, uint256.NewInt(0xBEEF), tracing.BalanceChangeUnspecified)
root, err := state.Commit(0, true, false)
if err != nil {
t.Fatalf("commit: %v", err)
}
// Now read the state back via a StateReader for the new root. The
// dispatch in CachingDB.StateReader uses bintrieFlatReader because
// IsVerkle() is true.
reader, err := sdb.StateReader(root)
if err != nil {
t.Fatalf("StateReader: %v", err)
}
gotA, err := reader.Account(addrA)
if err != nil {
t.Fatalf("Account A: %v", err)
}
if gotA == nil {
t.Fatal("addrA: account is nil after commit")
}
if gotA.Nonce != 5 {
t.Errorf("addrA nonce: got %d, want 5", gotA.Nonce)
}
if gotA.Balance.Cmp(balance) != 0 {
t.Errorf("addrA balance: got %s, want %s", gotA.Balance, balance)
}
if len(gotA.CodeHash) != 32 {
t.Errorf("addrA code hash: got %d-byte hash, want 32", len(gotA.CodeHash))
}
gotB, err := reader.Account(addrB)
if err != nil {
t.Fatalf("Account B: %v", err)
}
if gotB == nil {
t.Fatal("addrB: account is nil after commit")
}
if gotB.Balance.Uint64() != 0xBEEF {
t.Errorf("addrB balance: got %s, want 0xBEEF", gotB.Balance)
}
// Storage slot round-trip: SetState wrote value at slot 7 of addrA.
// The bintrieFlatReader.Storage call derives the bintrie storage
// key locally and looks it up via pathdb's AccountRLP path.
gotSlot, err := reader.Storage(addrA, slot)
if err != nil {
t.Fatalf("Storage: %v", err)
}
if gotSlot != value {
t.Errorf("storage slot: got %x, want %x", gotSlot, value)
}
}
// TestBintrieFlatReaderMissingAccountAuthoritative verifies that the flat
// reader returns (nil, nil) for absent accounts after generation completes.
func TestBintrieFlatReaderMissingAccountAuthoritative(t *testing.T) {
disk := rawdb.NewMemoryDatabase()
tdb := triedb.NewDatabase(disk, triedb.VerkleDefaults)
sdb := NewDatabase(tdb, nil)
state, err := New(types.EmptyVerkleHash, sdb)
if err != nil {
t.Fatalf("init state: %v", err)
}
// Touch addrA so the trie has at least one stem.
addrA := common.HexToAddress("0x0101010101010101010101010101010101010101")
state.SetBalance(addrA, uint256.NewInt(1), tracing.BalanceChangeUnspecified)
root, err := state.Commit(0, true, false)
if err != nil {
t.Fatalf("commit: %v", err)
}
// Flush to disk so the generator completes (genMarker → nil).
if err := tdb.Commit(root, false); err != nil {
t.Fatalf("tdb.Commit (flush to disk): %v", err)
}
// Get the pathdb reader so we can test the bintrieFlatReader in
// isolation.
pathdbReader, err := tdb.StateReader(root)
if err != nil {
t.Fatalf("pathdb StateReader: %v", err)
}
br := newBintrieFlatReader(pathdbReader)
if br == nil {
t.Fatal("newBintrieFlatReader returned nil")
}
missing := common.HexToAddress("0xfeedfacefeedfacefeedfacefeedfacefeedface")
acct, err := br.Account(missing)
if err != nil {
t.Fatalf("expected authoritative nil for missing account, got error: %v", err)
}
if acct != nil {
t.Fatalf("expected nil account for missing address, got: %+v", acct)
}
}
// TestBintrieFlatReaderEndToEndAfterFlush is the smoking-gun regression
// test for A1 (fix bintrieFlatReader disk-layer shape). Before the A1
// remediation, `bintrieFlatCodec.ReadAccount` returned the full stem
// blob from disk while `bintrieFlatReader.Account` expected a per-offset
// 32-byte value — so every disk-layer hit errored with "bintrie
// BasicData leaf invalid length". The original TestBintrieFlatReaderEndToEnd
// did not catch this because it never flushed the write buffer to disk:
// all reads came from the in-memory diff-layer buffer (which stores
// per-offset entries correctly).
//
// This test explicitly calls `tdb.Commit(root, false)` after the state
// commit, forcing the buffer to flush. Subsequent reads MUST hit the
// disk-layer code path. If A1 regresses, the reads either error out or
// return wrong data.
func TestBintrieFlatReaderEndToEndAfterFlush(t *testing.T) {
disk := rawdb.NewMemoryDatabase()
tdb := triedb.NewDatabase(disk, triedb.VerkleDefaults)
sdb := NewDatabase(tdb, nil)
state, err := New(types.EmptyVerkleHash, sdb)
if err != nil {
t.Fatalf("init state: %v", err)
}
var (
addrA = common.HexToAddress("0xAAaaAAaaAAaaAAaaAAaaAAaaAAaaAAaaAAaaAAaa")
addrB = common.HexToAddress("0xBBbbBBbbBBbbBBbbBBbbBBbbBBbbBBbbBBbbBBbb")
balance = uint256.NewInt(0xCAFE)
slot = common.HexToHash("0x07")
value = common.HexToHash("0x42")
)
state.SetBalance(addrA, balance, tracing.BalanceChangeUnspecified)
state.SetNonce(addrA, 5, tracing.NonceChangeUnspecified)
state.SetCode(addrA, []byte{0x60, 0x80, 0x60, 0x40}, tracing.CodeChangeUnspecified)
state.SetState(addrA, slot, value)
state.SetBalance(addrB, uint256.NewInt(0xBEEF), tracing.BalanceChangeUnspecified)
root, err := state.Commit(0, true, false)
if err != nil {
t.Fatalf("commit: %v", err)
}
// Force buffer → disk flush. Without this, all reads below would hit
// the in-memory diff-layer buffer path, masking the A1 bug.
if err := tdb.Commit(root, false); err != nil {
t.Fatalf("tdb.Commit (flush to disk): %v", err)
}
// Open a fresh StateReader for the flushed root. Reads now go
// through the disk layer via `codec.ReadAccount`, which (post-A1)
// must return per-offset 32-byte values matching what the reader
// expects.
reader, err := sdb.StateReader(root)
if err != nil {
t.Fatalf("StateReader after flush: %v", err)
}
gotA, err := reader.Account(addrA)
if err != nil {
t.Fatalf("Account A after flush: %v", err)
}
if gotA == nil {
t.Fatal("addrA: account is nil after flush (A1 regression)")
}
if gotA.Nonce != 5 {
t.Errorf("addrA nonce after flush: got %d, want 5", gotA.Nonce)
}
if gotA.Balance.Cmp(balance) != 0 {
t.Errorf("addrA balance after flush: got %s, want %s", gotA.Balance, balance)
}
gotB, err := reader.Account(addrB)
if err != nil {
t.Fatalf("Account B after flush: %v", err)
}
if gotB == nil {
t.Fatal("addrB: account is nil after flush (A1 regression)")
}
if gotB.Balance.Uint64() != 0xBEEF {
t.Errorf("addrB balance after flush: got %s, want 0xBEEF", gotB.Balance)
}
gotSlot, err := reader.Storage(addrA, slot)
if err != nil {
t.Fatalf("Storage after flush: %v", err)
}
if gotSlot != value {
t.Errorf("storage slot after flush: got %x, want %x", gotSlot, value)
}
}
// TestBintrieFlatReaderMultipleOffsetsPerStem verifies that multiple
// offsets at the same stem (BasicData at offset 0, CodeHash at offset 1,
// a header storage slot at offset 64+slotnum) all round-trip correctly
// through the per-offset read path. This exercises the "same stem, many
// offsets" common case for contract accounts with header storage.
func TestBintrieFlatReaderMultipleOffsetsPerStem(t *testing.T) {
disk := rawdb.NewMemoryDatabase()
tdb := triedb.NewDatabase(disk, triedb.VerkleDefaults)
sdb := NewDatabase(tdb, nil)
state, err := New(types.EmptyVerkleHash, sdb)
if err != nil {
t.Fatalf("init state: %v", err)
}
addr := common.HexToAddress("0x1234567890abcdef1234567890abcdef12345678")
state.SetBalance(addr, uint256.NewInt(100), tracing.BalanceChangeUnspecified)
state.SetNonce(addr, 7, tracing.NonceChangeUnspecified)
state.SetCode(addr, []byte{0xDE, 0xAD, 0xBE, 0xEF}, tracing.CodeChangeUnspecified)
// Header slots 0..63 (per EIP-7864) live at the same stem as
// BasicData/CodeHash. Set a few to exercise multi-offset per stem.
state.SetState(addr, common.HexToHash("0x00"), common.HexToHash("0x11"))
state.SetState(addr, common.HexToHash("0x01"), common.HexToHash("0x22"))
state.SetState(addr, common.HexToHash("0x05"), common.HexToHash("0x33"))
root, err := state.Commit(0, true, false)
if err != nil {
t.Fatalf("commit: %v", err)
}
// Flush so the reads hit the disk path.
if err := tdb.Commit(root, false); err != nil {
t.Fatalf("tdb.Commit: %v", err)
}
reader, err := sdb.StateReader(root)
if err != nil {
t.Fatalf("StateReader: %v", err)
}
gotAcct, err := reader.Account(addr)
if err != nil {
t.Fatalf("Account: %v", err)
}
if gotAcct == nil {
t.Fatal("account is nil")
}
if gotAcct.Nonce != 7 {
t.Errorf("nonce: got %d, want 7", gotAcct.Nonce)
}
if gotAcct.Balance.Uint64() != 100 {
t.Errorf("balance: got %s, want 100", gotAcct.Balance)
}
for _, tc := range []struct{ slot, want common.Hash }{
{common.HexToHash("0x00"), common.HexToHash("0x11")},
{common.HexToHash("0x01"), common.HexToHash("0x22")},
{common.HexToHash("0x05"), common.HexToHash("0x33")},
} {
got, err := reader.Storage(addr, tc.slot)
if err != nil {
t.Fatalf("Storage(%x): %v", tc.slot, err)
}
if got != tc.want {
t.Errorf("slot %x: got %x, want %x", tc.slot, got, tc.want)
}
}
}
// TestBintrieFlatReaderStorageTombstone verifies the bintrie "tombstone"
// convention: a storage slot set to zero is present-with-32-zero-bytes,
// which must be distinguishable from "never written" (absent). This is
// the A16/T8 integration test.
func TestBintrieFlatReaderStorageTombstone(t *testing.T) {
disk := rawdb.NewMemoryDatabase()
tdb := triedb.NewDatabase(disk, triedb.VerkleDefaults)
sdb := NewDatabase(tdb, nil)
addr := common.HexToAddress("0xABCDEF0123456789ABCDEF0123456789ABCDEF01")
slot := common.HexToHash("0x07")
nonZero := common.HexToHash("0x42")
// Block 1: set slot to non-zero.
state1, _ := New(types.EmptyVerkleHash, sdb)
state1.SetBalance(addr, uint256.NewInt(1), tracing.BalanceChangeUnspecified)
state1.SetState(addr, slot, nonZero)
root1, err := state1.Commit(0, true, false)
if err != nil {
t.Fatalf("commit block 1: %v", err)
}
// Block 2: set the same slot to zero (the bintrie writes 32 zero
// bytes as a tombstone rather than deleting the offset).
state2, _ := New(root1, sdb)
state2.SetState(addr, slot, common.Hash{})
root2, err := state2.Commit(1, true, false)
if err != nil {
t.Fatalf("commit block 2: %v", err)
}
// Read at block 2: should be the zero hash.
reader2, err := sdb.StateReader(root2)
if err != nil {
t.Fatalf("StateReader(block2): %v", err)
}
got2, err := reader2.Storage(addr, slot)
if err != nil {
t.Fatalf("Storage(block2): %v", err)
}
if got2 != (common.Hash{}) {
t.Errorf("block 2 slot: got %x, want zero", got2)
}
// Read at block 1: should still be the non-zero value.
reader1, err := sdb.StateReader(root1)
if err != nil {
t.Fatalf("StateReader(block1): %v", err)
}
got1, err := reader1.Storage(addr, slot)
if err != nil {
t.Fatalf("Storage(block1): %v", err)
}
if got1 != nonZero {
t.Errorf("block 1 slot: got %x, want %x", got1, nonZero)
}
}
// TestBintrieFlatReaderMultiBlockEvolution verifies that diff-layer
// chaining works correctly across multiple blocks for the bintrie path.
// This is the A16/T9 integration test.
func TestBintrieFlatReaderMultiBlockEvolution(t *testing.T) {
disk := rawdb.NewMemoryDatabase()
tdb := triedb.NewDatabase(disk, triedb.VerkleDefaults)
sdb := NewDatabase(tdb, nil)
addr := common.HexToAddress("0xDeaDBeefDeaDBeefDeaDBeefDeaDBeefDeaDBeef")
// Block 1: nonce=1, balance=100
state1, _ := New(types.EmptyVerkleHash, sdb)
state1.SetBalance(addr, uint256.NewInt(100), tracing.BalanceChangeUnspecified)
state1.SetNonce(addr, 1, tracing.NonceChangeUnspecified)
root1, err := state1.Commit(0, true, false)
if err != nil {
t.Fatalf("commit block 1: %v", err)
}
// Block 2: nonce=2 (balance unchanged at 100)
state2, _ := New(root1, sdb)
state2.SetNonce(addr, 2, tracing.NonceChangeUnspecified)
root2, err := state2.Commit(1, true, false)
if err != nil {
t.Fatalf("commit block 2: %v", err)
}
// Block 3: balance=200 (nonce unchanged at 2)
state3, _ := New(root2, sdb)
state3.SetBalance(addr, uint256.NewInt(200), tracing.BalanceChangeUnspecified)
root3, err := state3.Commit(2, true, false)
if err != nil {
t.Fatalf("commit block 3: %v", err)
}
// Read at each root and verify the expected snapshot.
for _, tc := range []struct {
name string
root common.Hash
nonce uint64
balance uint64
}{
{"block1", root1, 1, 100},
{"block2", root2, 2, 100},
{"block3", root3, 2, 200},
} {
reader, err := sdb.StateReader(tc.root)
if err != nil {
t.Fatalf("%s StateReader: %v", tc.name, err)
}
got, err := reader.Account(addr)
if err != nil {
t.Fatalf("%s Account: %v", tc.name, err)
}
if got == nil {
t.Fatalf("%s: account is nil", tc.name)
}
if got.Nonce != tc.nonce {
t.Errorf("%s nonce: got %d, want %d", tc.name, got.Nonce, tc.nonce)
}
if got.Balance.Uint64() != tc.balance {
t.Errorf("%s balance: got %d, want %d", tc.name, got.Balance.Uint64(), tc.balance)
}
}
}

79
core/state/state_mut.go Normal file
View file

@ -0,0 +1,79 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package state
import "github.com/ethereum/go-ethereum/common"
type mutationType int
const (
update mutationType = iota
deletion
)
type mutation struct {
typ mutationType
applied bool
// precedingDelete indicates that a previously unapplied deletion was
// overwritten by an update (account deleted then re-created within
// the same block). IntermediateRoot uses this to notify the hasher
// of the deletion before the update so that any cached storage trie
// is evicted and the re-created account starts with a fresh trie.
precedingDelete bool
}
func (m *mutation) copy() *mutation {
return &mutation{
typ: m.typ,
applied: m.applied,
precedingDelete: m.precedingDelete,
}
}
func (m *mutation) isDelete() bool {
return m.typ == deletion
}
// markDelete is invoked when an account is deleted but the deletion is
// not yet committed. The pending mutation is cached and will be applied
// all together.
func (s *StateDB) markDelete(addr common.Address) {
if _, ok := s.mutations[addr]; !ok {
s.mutations[addr] = &mutation{}
}
s.mutations[addr].applied = false
s.mutations[addr].typ = deletion
s.mutations[addr].precedingDelete = false
}
func (s *StateDB) markUpdate(addr common.Address) {
m, ok := s.mutations[addr]
if !ok {
s.mutations[addr] = &mutation{}
m = s.mutations[addr]
}
// If this update overwrites a pending (unapplied) deletion, record it
// so that IntermediateRoot can notify the hasher of the deletion first.
// Do not reset precedingDelete otherwise: a subsequent markUpdate must
// preserve the flag set by an earlier markDelete→markUpdate sequence.
if !m.applied && m.typ == deletion {
m.precedingDelete = true
}
m.applied = false
m.typ = update
}

View file

@ -27,11 +27,6 @@ import (
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
"github.com/ethereum/go-ethereum/trie/bintrie"
"github.com/ethereum/go-ethereum/trie/transitiontrie"
"github.com/ethereum/go-ethereum/trie/trienode"
"github.com/holiman/uint256"
)
@ -49,13 +44,12 @@ func (s Storage) Copy() Storage {
// - Finally, call commit to return the changes of storage trie and update account data.
type stateObject struct {
db *StateDB
address common.Address // address of ethereum account
addressHash *common.Hash // hash of ethereum address of the account
origin *types.StateAccount // Account original data without any change applied, nil means it was not existent
data types.StateAccount // Account data with all mutations applied in the scope of block
address common.Address // address of ethereum account
addressHash *common.Hash // hash of ethereum address of the account
origin *Account // Account original data without any change applied, nil means it was not existent
data Account // Account data with all mutations applied in the scope of block
// Write caches.
trie Trie // storage trie, which becomes non-nil on first access
code []byte // contract bytecode, which gets set when code is loaded
originStorage Storage // Storage entries that have been accessed within the current block
@ -94,10 +88,10 @@ func (s *stateObject) empty() bool {
}
// newObject creates a state object.
func newObject(db *StateDB, address common.Address, acct *types.StateAccount) *stateObject {
func newObject(db *StateDB, address common.Address, acct *Account) *stateObject {
origin := acct
if acct == nil {
acct = types.NewEmptyStateAccount()
acct = newEmptyAccount()
}
return &stateObject{
db: db,
@ -127,40 +121,6 @@ func (s *stateObject) touch() {
s.db.journal.touchChange(s.address)
}
// getTrie returns the associated storage trie. The trie will be opened if it's
// not loaded previously. An error will be returned if trie can't be loaded.
//
// If a new trie is opened, it will be cached within the state object to allow
// subsequent reads to expand the same trie instead of reloading from disk.
func (s *stateObject) getTrie() (Trie, error) {
if s.trie == nil {
// Assumes the primary account trie is already loaded
tr, err := s.db.db.OpenStorageTrie(s.db.originalRoot, s.address, s.data.Root, s.db.trie)
if err != nil {
return nil, err
}
s.trie = tr
}
return s.trie, nil
}
// getPrefetchedTrie returns the associated trie, as populated by the prefetcher
// if it's available.
//
// Note, opposed to getTrie, this method will *NOT* blindly cache the resulting
// trie in the state object. The caller might want to do that, but it's cleaner
// to break the hidden interdependency between retrieving tries from the db or
// from the prefetcher.
func (s *stateObject) getPrefetchedTrie() Trie {
// If there's nothing to meaningfully return, let the user figure it out by
// pulling the trie from disk.
if (s.data.Root == types.EmptyRootHash && !s.db.db.TrieDB().IsVerkle()) || s.db.prefetcher == nil {
return nil
}
// Attempt to retrieve the trie from the prefetcher
return s.db.prefetcher.trie(s.addrHash(), s.data.Root)
}
// GetState retrieves a value associated with the given storage key.
func (s *stateObject) GetState(key common.Hash) common.Hash {
value, _ := s.getState(key)
@ -211,23 +171,22 @@ func (s *stateObject) GetCommittedState(key common.Hash) common.Hash {
s.originStorage[key] = common.Hash{} // track the empty slot as origin value
return common.Hash{}
}
s.db.StorageLoaded++
start := time.Now()
value, err := s.db.reader.Storage(s.address, key)
if err != nil {
s.db.setError(err)
return common.Hash{}
}
s.db.StorageLoaded++
s.db.StorageReads += time.Since(start)
// Schedule the resolved storage slots for prefetching if it's enabled.
if s.db.prefetcher != nil && s.data.Root != types.EmptyRootHash {
if err = s.db.prefetcher.prefetch(s.addrHash(), s.origin.Root, s.address, nil, []common.Hash{key}, true); err != nil {
log.Error("Failed to prefetch storage slot", "addr", s.address, "key", key, "err", err)
}
}
s.originStorage[key] = value
// Schedule the resolved storage slots for prefetching if it's enabled.
prefetch, ok := s.db.hasher.(Prefetcher)
if ok {
prefetch.PrefetchStorage(s.address, []common.Hash{key}, true)
}
return value
}
@ -273,7 +232,7 @@ func (s *stateObject) finalise() {
// The slot is different from its original value and hasn't been
// tracked for commit yet.
s.uncommittedStorage[key] = s.GetCommittedState(key)
slotsToPrefetch = append(slotsToPrefetch, key) // Copy needed for closure
slotsToPrefetch = append(slotsToPrefetch, key)
}
// Aggregate the dirty storage slots into the pending area. It might
// be possible that the value of tracked slot here is same with the
@ -283,11 +242,6 @@ func (s *stateObject) finalise() {
// byzantium fork) and entry is necessary to modify the value back.
s.pendingStorage[key] = value
}
if s.db.prefetcher != nil && len(slotsToPrefetch) > 0 && s.data.Root != types.EmptyRootHash {
if err := s.db.prefetcher.prefetch(s.addrHash(), s.data.Root, s.address, nil, slotsToPrefetch, false); err != nil {
log.Error("Failed to prefetch slots", "addr", s.address, "slots", len(slotsToPrefetch), "err", err)
}
}
if len(s.dirtyStorage) > 0 {
s.dirtyStorage = make(Storage)
}
@ -295,58 +249,27 @@ func (s *stateObject) finalise() {
// of the newly-created object as it's no longer eligible for self-destruct
// by EIP-6780. For non-newly-created objects, it's a no-op.
s.newContract = false
// Schedule the resolved storage slots for prefetching if it's enabled.
prefetch, ok := s.db.hasher.(Prefetcher)
if ok {
prefetch.PrefetchStorage(s.address, slotsToPrefetch, false)
}
}
// updateTrie is responsible for persisting cached storage changes into the
// object's storage trie. In case the storage trie is not yet loaded, this
// function will load the trie automatically. If any issues arise during the
// loading or updating of the trie, an error will be returned. Furthermore,
// this function will return the mutated storage trie, or nil if there is no
// storage change at all.
//
// It assumes all the dirty storage slots have been finalized before.
func (s *stateObject) updateTrie() (Trie, error) {
// Short circuit if nothing was accessed, don't trigger a prefetcher warning
// state hasher. It assumes all the dirty storage slots have been finalized
// before.
func (s *stateObject) updateTrie() error {
// Short circuit if nothing was accessed
if len(s.uncommittedStorage) == 0 {
// Nothing was written, so we could stop early. Unless we have both reads
// and witness collection enabled, in which case we need to fetch the trie.
if s.db.witness == nil || len(s.originStorage) == 0 {
return s.trie, nil
}
return nil
}
// Retrieve a pretecher populated trie, or fall back to the database. This will
// block until all prefetch tasks are done, which are needed for witnesses even
// for unmodified state objects.
tr := s.getPrefetchedTrie()
if tr != nil {
// Prefetcher returned a live trie, swap it out for the current one
s.trie = tr
} else {
// Fetcher not running or empty trie, fallback to the database trie
var err error
tr, err = s.getTrie()
if err != nil {
s.db.setError(err)
return nil, err
}
}
// Short circuit if nothing changed, don't bother with hashing anything
if len(s.uncommittedStorage) == 0 {
return s.trie, nil
}
// Perform trie updates before deletions. This prevents resolution of unnecessary trie nodes
// in circumstances similar to the following:
//
// Consider nodes `A` and `B` who share the same full node parent `P` and have no other siblings.
// During the execution of a block:
// - `A` is deleted,
// - `C` is created, and also shares the parent `P`.
// If the deletion is handled first, then `P` would be left with only one child, thus collapsed
// into a shortnode. This requires `B` to be resolved from disk.
// Whereas if the created node is handled first, then the collapse is avoided, and `B` is not resolved.
var (
deletions []common.Hash
used = make([]common.Hash, 0, len(s.uncommittedStorage))
updates int64
deletes int64
keys = make([]common.Hash, 0, len(s.uncommittedStorage))
vals = make([]common.Hash, 0, len(s.uncommittedStorage))
)
for key, origin := range s.uncommittedStorage {
// Skip noop changes, persist actual changes
@ -359,56 +282,24 @@ func (s *stateObject) updateTrie() (Trie, error) {
log.Error("Storage slot is not found in pending area", "address", s.address, "slot", key)
continue
}
if (value != common.Hash{}) {
if err := tr.UpdateStorage(s.address, key[:], common.TrimLeftZeroes(value[:])); err != nil {
s.db.setError(err)
return nil, err
}
s.db.StorageUpdated.Add(1)
if value == (common.Hash{}) {
deletes += 1
} else {
deletions = append(deletions, key)
updates += 1
}
// Cache the items for preloading
used = append(used, key) // Copy needed for closure
}
for _, key := range deletions {
if err := tr.DeleteStorage(s.address, key[:]); err != nil {
s.db.setError(err)
return nil, err
}
s.db.StorageDeleted.Add(1)
}
if s.db.prefetcher != nil {
s.db.prefetcher.used(s.addrHash(), s.data.Root, nil, used)
keys = append(keys, key)
vals = append(vals, value)
}
s.uncommittedStorage = make(Storage) // empties the commit markers
return tr, nil
}
s.db.StorageUpdated.Add(updates)
s.db.StorageDeleted.Add(deletes)
// updateRoot flushes all cached storage mutations to trie, recalculating the
// new storage trie root.
func (s *stateObject) updateRoot() {
// Flush cached storage mutations into trie, short circuit if any error
// is occurred or there is no change in the trie.
tr, err := s.updateTrie()
if err != nil || tr == nil {
return
}
s.data.Root = tr.Hash()
return s.db.hasher.UpdateStorage(s.address, keys, vals)
}
// commitStorage overwrites the clean storage with the storage changes and
// fulfills the storage diffs into the given accountUpdate struct.
func (s *stateObject) commitStorage(op *accountUpdate) {
var (
encode = func(val common.Hash) []byte {
if val == (common.Hash{}) {
return nil
}
blob, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(val[:]))
return blob
}
)
for key, val := range s.pendingStorage {
// Skip the noop storage changes, it might be possible the value
// of tracked slot is same in originStorage and pendingStorage
@ -419,17 +310,17 @@ func (s *stateObject) commitStorage(op *accountUpdate) {
}
hash := crypto.Keccak256Hash(key[:])
if op.storages == nil {
op.storages = make(map[common.Hash][]byte)
op.storages = make(map[common.Hash]common.Hash)
}
op.storages[hash] = encode(val)
op.storages[hash] = val
if op.storagesOriginByKey == nil {
op.storagesOriginByKey = make(map[common.Hash][]byte)
op.storagesOriginByKey = make(map[common.Hash]common.Hash)
}
if op.storagesOriginByHash == nil {
op.storagesOriginByHash = make(map[common.Hash][]byte)
op.storagesOriginByHash = make(map[common.Hash]common.Hash)
}
origin := encode(s.originStorage[key])
origin := s.originStorage[key]
op.storagesOriginByKey[key] = origin
op.storagesOriginByHash[hash] = origin
@ -444,23 +335,21 @@ func (s *stateObject) commitStorage(op *accountUpdate) {
//
// Note, commit may run concurrently across all the state objects. Do not assume
// thread-safe access to the statedb.
func (s *stateObject) commit() (*accountUpdate, *trienode.NodeSet, error) {
func (s *stateObject) commit() (*accountUpdate, error) {
// commit the account metadata changes
op := &accountUpdate{
address: s.address,
data: types.SlimAccountRLP(s.data),
}
if s.origin != nil {
op.origin = types.SlimAccountRLP(*s.origin)
data: &s.data,
origin: s.origin,
}
// commit the contract code if it's modified
if s.dirtyCode {
s.dirtyCode = false // reset the dirty flag
op.code = &contractCode{
hash: common.BytesToHash(s.CodeHash()),
blob: s.code,
}
s.dirtyCode = false // reset the dirty flag
if s.origin == nil {
op.code.originHash = types.EmptyCodeHash
} else {
@ -469,24 +358,8 @@ func (s *stateObject) commit() (*accountUpdate, *trienode.NodeSet, error) {
}
// Commit storage changes and the associated storage trie
s.commitStorage(op)
if len(op.storages) == 0 {
// nothing changed, don't bother to commit the trie
s.origin = s.data.Copy()
return op, nil, nil
}
// In Verkle/binary trie mode, all state objects share one unified trie.
// The main account trie commit in stateDB.commit() already calls
// CollectNodes on this trie, so calling Commit here again would
// redundantly traverse and serialize the entire tree per dirty account.
if s.db.GetTrie().IsVerkle() {
s.origin = s.data.Copy()
return op, nil, nil
}
// The storage trie root is omitted, as it has already been updated in the
// previous updateRoot step.
_, nodes := s.trie.Commit(false)
s.origin = s.data.Copy()
return op, nodes, nil
s.origin = s.data.copy()
return op, nil
}
// AddBalance adds amount to s's balance.
@ -532,21 +405,6 @@ func (s *stateObject) deepCopy(db *StateDB) *stateObject {
selfDestructed: s.selfDestructed,
newContract: s.newContract,
}
switch s.trie.(type) {
case *bintrie.BinaryTrie:
// UBT uses only one tree, and the copy has already been
// made in mustCopyTrie.
obj.trie = db.trie
case *transitiontrie.TransitionTrie:
// Same thing for the transition tree, since the MPT is
// read-only.
obj.trie = db.trie
case *trie.StateTrie:
obj.trie = mustCopyTrie(s.trie)
case nil:
// do nothing
}
return obj
}
@ -636,7 +494,3 @@ func (s *stateObject) Balance() *uint256.Int {
func (s *stateObject) Nonce() uint64 {
return s.data.Nonce
}
func (s *stateObject) Root() common.Hash {
return s.data.Root
}

View file

@ -41,6 +41,7 @@ const (
)
// Database key scheme for states.
// nolint:unused
var (
accountKeySize = int64(len(rawdb.SnapshotAccountPrefix) + common.HashLength)
storageKeySize = int64(len(rawdb.SnapshotStoragePrefix) + common.HashLength*2)
@ -130,11 +131,15 @@ func calSizeStats(update *stateUpdate) (SizeStats, error) {
BlockNumber: update.blockNumber,
StateRoot: update.root,
}
accounts, accountOrigin, storages, storageOrigin, err := update.encodeMerkle()
if err != nil {
return SizeStats{}, err
}
// Measure the account changes
for addr, oldValue := range update.accountsOrigin {
for addr, oldValue := range accountOrigin {
addrHash := crypto.Keccak256Hash(addr.Bytes())
newValue, exists := update.accounts[addrHash]
newValue, exists := accounts[addrHash]
if !exists {
return SizeStats{}, fmt.Errorf("account %x not found", addr)
}
@ -156,9 +161,9 @@ func calSizeStats(update *stateUpdate) (SizeStats, error) {
}
// Measure storage changes
for addr, slots := range update.storagesOrigin {
for addr, slots := range storageOrigin {
addrHash := crypto.Keccak256Hash(addr.Bytes())
subset, exists := update.storages[addrHash]
subset, exists := storages[addrHash]
if !exists {
return SizeStats{}, fmt.Errorf("storage %x not found", addr)
}

View file

@ -23,8 +23,6 @@ import (
"maps"
"slices"
"sort"
"sync"
"sync/atomic"
"time"
"github.com/ethereum/go-ethereum/common"
@ -32,8 +30,8 @@ import (
"github.com/ethereum/go-ethereum/core/tracing"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/params"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
"github.com/ethereum/go-ethereum/trie/trienode"
"github.com/holiman/uint256"
@ -43,26 +41,6 @@ import (
// TriesInMemory represents the number of layers that are kept in RAM.
const TriesInMemory = 128
type mutationType int
const (
update mutationType = iota
deletion
)
type mutation struct {
typ mutationType
applied bool
}
func (m *mutation) copy() *mutation {
return &mutation{typ: m.typ, applied: m.applied}
}
func (m *mutation) isDelete() bool {
return m.typ == deletion
}
// StateDB structs within the ethereum protocol are used to store anything
// within the merkle trie. StateDBs take care of caching and storing
// nested states. It's the general query interface to retrieve:
@ -75,10 +53,9 @@ func (m *mutation) isDelete() bool {
// must be created with new root and updated database for accessing post-
// commit states.
type StateDB struct {
db Database
prefetcher *triePrefetcher
reader Reader
trie Trie // it's resolved on first access
db Database
reader Reader
hasher Hasher
// originalRoot is the pre-state root, before any changes were made.
// It will be updated when the Commit is called.
@ -137,32 +114,7 @@ type StateDB struct {
witness *stateless.Witness
// Measurements gathered during execution for debugging purposes
AccountReads time.Duration
AccountHashes time.Duration
AccountUpdates time.Duration
AccountCommits time.Duration
StorageReads time.Duration
StorageUpdates time.Duration
StorageCommits time.Duration
DatabaseCommits time.Duration
CodeReads time.Duration
AccountLoaded int // Number of accounts retrieved from the database during the state transition
AccountUpdated int // Number of accounts updated during the state transition
AccountDeleted int // Number of accounts deleted during the state transition
StorageLoaded int // Number of storage slots retrieved from the database during the state transition
StorageUpdated atomic.Int64 // Number of storage slots updated during the state transition
StorageDeleted atomic.Int64 // Number of storage slots deleted during the state transition
// CodeLoadBytes is the total number of bytes read from contract code.
// This value may be smaller than the actual number of bytes read, since
// some APIs (e.g. CodeSize) may load the entire code from either the
// cache or the database when the size is not available in the cache.
CodeLoaded int // Number of contract code loaded during the state transition
CodeLoadBytes int // Total bytes of resolved code
CodeUpdated int // Number of contracts with code changes that persisted
CodeUpdateBytes int // Total bytes of persisted code written
Stats
}
// New creates a new state from a given trie.
@ -177,10 +129,15 @@ func New(root common.Hash, db Database) (*StateDB, error) {
// NewWithReader creates a new state for the specified state root. Unlike New,
// this function accepts an additional Reader which is bound to the given root.
func NewWithReader(root common.Hash, db Database, reader Reader) (*StateDB, error) {
hasher, err := db.Hasher(root)
if err != nil {
return nil, err
}
sdb := &StateDB{
db: db,
originalRoot: root,
reader: reader,
hasher: hasher,
stateObjects: make(map[common.Address]*stateObject),
stateObjectsDestruct: make(map[common.Address]*stateObject),
mutations: make(map[common.Address]*mutation),
@ -196,39 +153,9 @@ func NewWithReader(root common.Hash, db Database, reader Reader) (*StateDB, erro
return sdb, nil
}
// StartPrefetcher initializes a new trie prefetcher to pull in nodes from the
// state trie concurrently while the state is mutated so that when we reach the
// commit phase, most of the needed data is already hot.
func (s *StateDB) StartPrefetcher(namespace string, witness *stateless.Witness) {
// Terminate any previously running prefetcher
s.StopPrefetcher()
// Enable witness collection if requested
// TraceWitness enables execution witness gathering.
func (s *StateDB) TraceWitness(witness *stateless.Witness) {
s.witness = witness
// With the switch to the Proof-of-Stake consensus algorithm, block production
// rewards are now handled at the consensus layer. Consequently, a block may
// have no state transitions if it contains no transactions and no withdrawals.
// In such cases, the account trie won't be scheduled for prefetching, leading
// to unnecessary error logs.
//
// To prevent this, the account trie is always scheduled for prefetching once
// the prefetcher is constructed. For more details, see:
// https://github.com/ethereum/go-ethereum/issues/29880
s.prefetcher = newTriePrefetcher(s.db, s.originalRoot, namespace, witness == nil)
if err := s.prefetcher.prefetch(common.Hash{}, s.originalRoot, common.Address{}, nil, nil, false); err != nil {
log.Error("Failed to prefetch account trie", "root", s.originalRoot, "err", err)
}
}
// StopPrefetcher terminates a running prefetcher and reports any leftover stats
// from the gathered metrics.
func (s *StateDB) StopPrefetcher() {
if s.prefetcher != nil {
s.prefetcher.terminate(false)
s.prefetcher.report()
s.prefetcher = nil
}
}
// setError remembers the first non-nil error it is called with.
@ -254,7 +181,7 @@ func (s *StateDB) AddLog(log *types.Log) {
}
// GetLogs returns the logs matching the specified transaction hash, and annotates
// them with the given blockNumber and blockHash.
// them with the given block attributes.
func (s *StateDB) GetLogs(hash common.Hash, blockNumber uint64, blockHash common.Hash, blockTime uint64) []*types.Log {
logs := s.logs[hash]
for _, l := range logs {
@ -265,6 +192,7 @@ func (s *StateDB) GetLogs(hash common.Hash, blockNumber uint64, blockHash common
return logs
}
// Logs returns the un-annotated logs in order.
func (s *StateDB) Logs() []*types.Log {
logs := make([]*types.Log, 0, s.logSize)
for _, lgs := range s.logs {
@ -336,19 +264,6 @@ func (s *StateDB) GetNonce(addr common.Address) uint64 {
return 0
}
// GetStorageRoot retrieves the storage root from the given address or empty
// if object not found.
//
// Note: the storage root returned corresponds to the trie since last Intermediate
// operation, some recent in-memory changes are excluded.
func (s *StateDB) GetStorageRoot(addr common.Address) common.Hash {
stateObject := s.getStateObject(addr)
if stateObject != nil {
return stateObject.Root()
}
return common.Hash{}
}
// TxIndex returns the current transaction index set by SetTxContext.
func (s *StateDB) TxIndex() int {
return s.txIndex
@ -558,24 +473,6 @@ func (s *StateDB) GetTransientState(addr common.Address, key common.Hash) common
// Setting, updating & deleting state object methods.
//
// updateStateObject writes the given object to the trie.
func (s *StateDB) updateStateObject(obj *stateObject) {
// Encode the account and update the account trie
if err := s.trie.UpdateAccount(obj.Address(), &obj.data, len(obj.code)); err != nil {
s.setError(fmt.Errorf("updateStateObject (%x) error: %v", obj.Address(), err))
}
if obj.dirtyCode {
s.trie.UpdateContractCode(obj.Address(), common.BytesToHash(obj.CodeHash()), obj.code)
}
}
// deleteStateObject removes the given object from the state trie.
func (s *StateDB) deleteStateObject(addr common.Address) {
if err := s.trie.DeleteAccount(addr); err != nil {
s.setError(fmt.Errorf("deleteStateObject (%x) error: %v", addr[:], err))
}
}
// getStateObject retrieves a state object given by the address, returning nil if
// the object is not found or was deleted in this execution context.
func (s *StateDB) getStateObject(addr common.Address) *stateObject {
@ -587,29 +484,28 @@ func (s *StateDB) getStateObject(addr common.Address) *stateObject {
if _, ok := s.stateObjectsDestruct[addr]; ok {
return nil
}
s.AccountLoaded++
start := time.Now()
acct, err := s.reader.Account(addr)
if err != nil {
s.setError(fmt.Errorf("getStateObject (%x) error: %w", addr.Bytes(), err))
return nil
}
s.AccountLoaded++
s.AccountReads += time.Since(start)
// Short circuit if the account is not found
if acct == nil {
return nil
}
// Schedule the resolved account for prefetching if it's enabled.
if s.prefetcher != nil {
if err = s.prefetcher.prefetch(common.Hash{}, s.originalRoot, common.Address{}, []common.Address{addr}, nil, true); err != nil {
log.Error("Failed to prefetch account", "addr", addr, "err", err)
}
}
// Insert into the live set
obj := newObject(s, addr, acct)
s.setStateObject(obj)
// Schedule the resolved account for prefetching if it's enabled.
prefetcher, ok := s.hasher.(Prefetcher)
if ok {
prefetcher.PrefetchAccount([]common.Address{addr}, true)
}
return obj
}
@ -673,6 +569,7 @@ func (s *StateDB) Copy() *StateDB {
state := &StateDB{
db: s.db,
reader: s.reader,
hasher: s.hasher.Copy(),
originalRoot: s.originalRoot,
stateObjects: make(map[common.Address]*stateObject, len(s.stateObjects)),
stateObjectsDestruct: make(map[common.Address]*stateObject, len(s.stateObjectsDestruct)),
@ -695,9 +592,6 @@ func (s *StateDB) Copy() *StateDB {
transientStorage: s.transientStorage.Copy(),
journal: s.journal.copy(),
}
if s.trie != nil {
state.trie = mustCopyTrie(s.trie)
}
if s.witness != nil {
state.witness = s.witness.Copy()
}
@ -810,18 +704,18 @@ func (s *StateDB) Finalise(deleteEmptyObjects bool) {
obj.finalise()
s.markUpdate(addr)
}
// At this point, also ship the address off to the precacher. The precacher
// At this point, also ship the address off to the prefetcher. The prefetcher
// will start loading tries, and when the change is eventually committed,
// the commit-phase will be a lot faster
addressesToPrefetch = append(addressesToPrefetch, addr) // Copy needed for closure
}
if s.prefetcher != nil && len(addressesToPrefetch) > 0 {
if err := s.prefetcher.prefetch(common.Hash{}, s.originalRoot, common.Address{}, addressesToPrefetch, nil, false); err != nil {
log.Error("Failed to prefetch addresses", "addresses", len(addressesToPrefetch), "err", err)
}
addressesToPrefetch = append(addressesToPrefetch, addr)
}
// Invalidate journal because reverting across transactions is not allowed.
s.clearJournalAndRefund()
prefetcher, ok := s.hasher.(Prefetcher)
if ok {
prefetcher.PrefetchAccount(addressesToPrefetch, false)
}
}
// IntermediateRoot computes the current root hash of the state trie.
@ -831,204 +725,100 @@ func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash {
// Finalise all the dirty storage states and write them into the tries
s.Finalise(deleteEmptyObjects)
// Initialize the trie if it's not constructed yet. If the prefetch
// is enabled, the trie constructed below will be replaced by the
// prefetched one.
//
// This operation must be done before state object storage hashing,
// as it assumes the main trie is already loaded.
if s.trie == nil {
tr, err := s.db.OpenTrie(s.originalRoot)
if err != nil {
// Pre-process mutations whose preceding deletion has not yet been
// applied. This happens when an account is deleted and then re-created
// within the same block and the deletion was overwritten by the update.
// Notify the hasher of the deletion first so that any cached storage
// trie is evicted and the re-created account starts with a fresh trie.
var (
delAddrs []common.Address
delAccts []AccountMut
start = time.Now()
)
for addr, op := range s.mutations {
if !op.precedingDelete {
continue
}
op.precedingDelete = false
delAddrs = append(delAddrs, addr)
delAccts = append(delAccts, AccountMut{Account: nil})
}
if len(delAddrs) > 0 {
if err := s.hasher.UpdateAccount(delAddrs, delAccts); err != nil {
s.setError(err)
return common.Hash{}
}
s.trie = tr
s.AccountDeleted += len(delAddrs)
}
// If there was a trie prefetcher operating, terminate it async so that the
// individual storage tries can be updated as soon as the disk load finishes.
if s.prefetcher != nil {
s.prefetcher.terminate(true)
defer func() {
s.prefetcher.report()
s.prefetcher = nil // Pre-byzantium, unset any used up prefetcher
}()
}
// Process all storage updates concurrently. The state object update root
// method will internally call a blocking trie fetch from the prefetcher,
// so there's no need to explicitly wait for the prefetchers to finish.
var (
start = time.Now()
workers errgroup.Group
)
if s.db.TrieDB().IsVerkle() {
// Bypass per-account updateTrie() for binary trie. In binary trie mode
// there is only one unified trie (OpenStorageTrie returns self), so the
// per-account trie setup in updateTrie() (getPrefetchedTrie, getTrie,
// prefetcher.used) is redundant overhead. Apply all storage updates
// directly in a single pass.
for addr, op := range s.mutations {
if op.applied || op.isDelete() {
continue
}
obj := s.stateObjects[addr]
if len(obj.uncommittedStorage) == 0 {
continue
}
for key, origin := range obj.uncommittedStorage {
value, exist := obj.pendingStorage[key]
if value == origin || !exist {
continue
}
if (value != common.Hash{}) {
if err := s.trie.UpdateStorage(addr, key[:], common.TrimLeftZeroes(value[:])); err != nil {
s.setError(err)
}
s.StorageUpdated.Add(1)
} else {
if err := s.trie.DeleteStorage(addr, key[:]); err != nil {
s.setError(err)
}
s.StorageDeleted.Add(1)
}
}
}
// Clear uncommittedStorage and assign trie on each touched object.
// obj.trie must be set because this path bypasses updateTrie(), which
// is where obj.trie normally gets lazily loaded via getTrie().
for addr, op := range s.mutations {
if op.applied || op.isDelete() {
continue
}
obj := s.stateObjects[addr]
if len(obj.uncommittedStorage) > 0 {
obj.uncommittedStorage = make(Storage)
}
obj.trie = s.trie
}
} else {
for addr, op := range s.mutations {
if op.applied || op.isDelete() {
continue
}
obj := s.stateObjects[addr] // closure for the task runner below
workers.Go(func() error {
obj.updateRoot()
s.AccountUpdates += time.Since(start)
// If witness building is enabled and the state object has a trie,
// gather the witnesses for its specific storage trie
if s.witness != nil && obj.trie != nil {
s.witness.AddState(obj.trie.Witness(), obj.addrHash())
}
return nil
})
// Process all storage updates concurrently, flushing them to hasher.
start = time.Now()
var workers errgroup.Group
for addr, op := range s.mutations {
if op.applied || op.isDelete() {
continue
}
obj := s.stateObjects[addr]
workers.Go(obj.updateTrie)
}
// If witness building is enabled, gather all the read-only accesses.
// Skip witness collection in Verkle mode, they will be gathered
// together at the end.
if s.witness != nil && !s.db.TrieDB().IsVerkle() {
// Pull in anything that has been accessed before destruction
for _, obj := range s.stateObjectsDestruct {
// Skip any objects that haven't touched their storage
if len(obj.originStorage) == 0 {
continue
}
if trie := obj.getPrefetchedTrie(); trie != nil {
s.witness.AddState(trie.Witness(), obj.addrHash())
} else if obj.trie != nil {
s.witness.AddState(obj.trie.Witness(), obj.addrHash())
}
}
// Pull in only-read and non-destructed trie witnesses
for _, obj := range s.stateObjects {
// Skip any objects that have been updated
if _, ok := s.mutations[obj.address]; ok {
continue
}
// Skip any objects that haven't touched their storage
if len(obj.originStorage) == 0 {
continue
}
if trie := obj.getPrefetchedTrie(); trie != nil {
s.witness.AddState(trie.Witness(), obj.addrHash())
} else if obj.trie != nil {
s.witness.AddState(obj.trie.Witness(), obj.addrHash())
}
}
if err := workers.Wait(); err != nil {
s.setError(err)
}
workers.Wait()
s.StorageUpdates += time.Since(start)
// Now we're about to start to write changes to the trie. The trie is so far
// _untouched_. We can check with the prefetcher, if it can give us a trie
// which has the same root, but also has some content loaded into it.
//
// Don't check prefetcher if verkle trie has been used. In the context of verkle,
// only a single trie is used for state hashing. Replacing a non-nil verkle tree
// here could result in losing uncommitted changes from storage.
start = time.Now()
if s.prefetcher != nil && !s.db.TrieDB().IsVerkle() {
if trie := s.prefetcher.trie(common.Hash{}, s.originalRoot); trie == nil {
log.Error("Failed to retrieve account pre-fetcher trie")
} else {
s.trie = trie
}
}
// Perform updates before deletions. This prevents resolution of unnecessary trie nodes
// in circumstances similar to the following:
//
// Consider nodes `A` and `B` who share the same full node parent `P` and have no other siblings.
// During the execution of a block:
// - `A` self-destructs,
// - `C` is created, and also shares the parent `P`.
// If the self-destruct is handled first, then `P` would be left with only one child, thus collapsed
// into a shortnode. This requires `B` to be resolved from disk.
// Whereas if the created node is handled first, then the collapse is avoided, and `B` is not resolved.
// Process all account updates
var (
usedAddrs []common.Address
deletedAddrs []common.Address
addresses []common.Address
accounts []AccountMut
)
start = time.Now()
for addr, op := range s.mutations {
if op.applied {
continue
}
op.applied = true
addresses = append(addresses, addr)
if op.isDelete() {
deletedAddrs = append(deletedAddrs, addr)
} else {
obj := s.stateObjects[addr]
s.updateStateObject(obj)
s.AccountUpdated += 1
accounts = append(accounts, AccountMut{Account: nil})
s.AccountDeleted += 1
continue
}
obj := s.stateObjects[addr]
// CodeSize must be the account's CURRENT total code size, even for
// non-code-touching mutations. obj.CodeSize() returns len(obj.code)
// when the code is loaded, otherwise falls back to a code-size
// lookup via the reader. Hashers that pack code size into the
// on-trie account encoding (e.g. the binary trie BasicData leaf,
// per EIP-7864) rely on this value. Passing the default 0 here on
// a balance/nonce-only update would silently corrupt the BasicData
// leaf of every contract touched without a code write.
mut := AccountMut{
Account: &obj.data,
CodeSize: obj.CodeSize(),
}
if obj.dirtyCode {
mut.Code = &CodeMut{Code: obj.code}
// Count code writes post-Finalise so reverted CREATEs are excluded.
if obj.dirtyCode {
s.CodeUpdated += 1
s.CodeUpdateBytes += len(obj.code)
}
s.CodeUpdated += 1
s.CodeUpdateBytes += len(obj.code)
}
usedAddrs = append(usedAddrs, addr) // Copy needed for closure
accounts = append(accounts, mut)
s.AccountUpdated += 1
}
for _, deletedAddr := range deletedAddrs {
s.deleteStateObject(deletedAddr)
s.AccountDeleted += 1
if err := s.hasher.UpdateAccount(addresses, accounts); err != nil {
s.setError(err)
return common.Hash{}
}
s.AccountUpdates += time.Since(start)
if s.prefetcher != nil {
s.prefetcher.used(common.Hash{}, s.originalRoot, usedAddrs, nil)
}
// Track the amount of time wasted on hashing the account trie
defer func(start time.Time) { s.AccountHashes += time.Since(start) }(time.Now())
hash := s.trie.Hash()
// If witness building is enabled, gather the account trie witness
if s.witness != nil {
s.witness.AddState(s.trie.Witness(), common.Hash{})
}
return hash
return s.hasher.Hash()
}
// SetTxContext sets the current transaction hash and index which are
@ -1045,11 +835,11 @@ func (s *StateDB) clearJournalAndRefund() {
}
// deleteStorage is designed to delete the storage trie of a designated account.
func (s *StateDB) deleteStorage(addrHash common.Hash, root common.Hash) (map[common.Hash][]byte, map[common.Hash][]byte, *trienode.NodeSet, error) {
func (s *StateDB) deleteStorage(addrHash common.Hash) (map[common.Hash]common.Hash, map[common.Hash]common.Hash, *trienode.NodeSet, error) {
var (
nodes = trienode.NewNodeSet(addrHash) // the set for trie node mutations (value is nil)
storages = make(map[common.Hash][]byte) // the set for storage mutations (value is nil)
storageOrigins = make(map[common.Hash][]byte) // the set for tracking the original value of slot
nodes = trienode.NewNodeSet(addrHash) // the set for trie node mutations (value is nil)
storages = make(map[common.Hash]common.Hash) // the set for storage mutations (value is nil)
storageOrigins = make(map[common.Hash]common.Hash) // the set for tracking the original value of slot
)
iteratee, err := s.db.Iteratee(s.originalRoot)
if err != nil {
@ -1070,8 +860,15 @@ func (s *StateDB) deleteStorage(addrHash common.Hash, root common.Hash) (map[com
return nil, nil, nil, err
}
key := it.Hash()
storages[key] = nil
storageOrigins[key] = slot
storages[key] = common.Hash{}
_, content, _, err := rlp.Split(slot)
if err != nil {
return nil, nil, nil, err
}
var value common.Hash
value.SetBytes(content)
storageOrigins[key] = value
if err := stack.Update(key.Bytes(), slot); err != nil {
return nil, nil, nil, err
@ -1080,9 +877,7 @@ func (s *StateDB) deleteStorage(addrHash common.Hash, root common.Hash) (map[com
if err := it.Error(); err != nil { // error might occur during iteration
return nil, nil, nil, err
}
if stack.Hash() != root {
return nil, nil, nil, fmt.Errorf("snapshot is not matched, exp %x, got %x", root, stack.Hash())
}
stack.Hash() // Commit the right boundary
return storages, storageOrigins, nodes, nil
}
@ -1104,9 +899,9 @@ func (s *StateDB) deleteStorage(addrHash common.Hash, root common.Hash) (map[com
// with their values be tracked as original value.
// In case (d), **original** account along with its storages should be deleted,
// with their values be tracked as original value.
func (s *StateDB) handleDestruction(noStorageWiping bool) (map[common.Hash]*accountDelete, []*trienode.NodeSet, error) {
func (s *StateDB) handleDestruction(noStorageWiping bool) (map[common.Hash]*accountDelete, *trienode.MergedNodeSet, error) {
var (
nodes []*trienode.NodeSet
nodes = trienode.NewMergedNodeSet()
deletes = make(map[common.Hash]*accountDelete)
)
for addr, prevObj := range s.stateObjectsDestruct {
@ -1124,36 +919,32 @@ func (s *StateDB) handleDestruction(noStorageWiping bool) (map[common.Hash]*acco
addrHash := crypto.Keccak256Hash(addr.Bytes())
op := &accountDelete{
address: addr,
origin: types.SlimAccountRLP(*prev),
origin: *prev,
}
deletes[addrHash] = op
// Short circuit if the origin storage was empty.
if prev.Root == types.EmptyRootHash || s.db.TrieDB().IsVerkle() {
if s.db.TrieDB().IsVerkle() {
continue
}
if noStorageWiping {
return nil, nil, fmt.Errorf("unexpected storage wiping, %x", addr)
}
// Remove storage slots belonging to the account.
storages, storagesOrigin, set, err := s.deleteStorage(addrHash, prev.Root)
storages, storagesOrigin, set, err := s.deleteStorage(addrHash)
if err != nil {
return nil, nil, fmt.Errorf("failed to delete storage, err: %w", err)
}
op.storages = storages
op.storagesOrigin = storagesOrigin
op.storages, op.storagesOrigin = storages, storagesOrigin
// Aggregate the associated trie node changes.
nodes = append(nodes, set)
if err := nodes.Merge(set); err != nil {
return nil, nil, err
}
}
return deletes, nodes, nil
}
// GetTrie returns the account trie.
func (s *StateDB) GetTrie() Trie {
return s.trie
}
// commit gathers the state mutations accumulated along with the associated
// trie changes, resetting all internal flags with the new state as the base.
func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool, blockNumber uint64) (*stateUpdate, error) {
@ -1168,89 +959,16 @@ func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool, blockNum
if s.dbErr != nil {
return nil, fmt.Errorf("commit aborted due to database error: %v", s.dbErr)
}
// Commit objects to the trie, measuring the elapsed time
var (
accountTrieNodesUpdated int
accountTrieNodesDeleted int
storageTrieNodesUpdated int
storageTrieNodesDeleted int
lock sync.Mutex // protect two maps below
nodes = trienode.NewMergedNodeSet() // aggregated trie nodes
updates = make(map[common.Hash]*accountUpdate, len(s.mutations)) // aggregated account updates
// merge aggregates the dirty trie nodes into the global set.
//
// Given that some accounts may be destroyed and then recreated within
// the same block, it's possible that a node set with the same owner
// may already exist. In such cases, these two sets are combined, with
// the later one overwriting the previous one if any nodes are modified
// or deleted in both sets.
//
// merge run concurrently across all the state objects and account trie.
merge = func(set *trienode.NodeSet) error {
if set == nil {
return nil
}
lock.Lock()
defer lock.Unlock()
updates, deletes := set.Size()
if set.Owner == (common.Hash{}) {
accountTrieNodesUpdated += updates
accountTrieNodesDeleted += deletes
} else {
storageTrieNodesUpdated += updates
storageTrieNodesDeleted += deletes
}
return nodes.Merge(set)
}
)
// Given that some accounts could be destroyed and then recreated within
// the same block, account deletions must be processed first. This ensures
// that the storage trie nodes deleted during destruction and recreated
// during subsequent resurrection can be combined correctly.
deletes, delNodes, err := s.handleDestruction(noStorageWiping)
deletes, nodes, err := s.handleDestruction(noStorageWiping)
if err != nil {
return nil, err
}
for _, set := range delNodes {
if err := merge(set); err != nil {
return nil, err
}
}
// Handle all state updates afterwards, concurrently to one another to shave
// off some milliseconds from the commit operation. Also accumulate the code
// writes to run in parallel with the computations.
var (
start = time.Now()
workers errgroup.Group
)
// Schedule the account trie first since that will be the biggest, so give
// it the most time to crunch.
//
// TODO(karalabe): This account trie commit is *very* heavy. 5-6ms at chain
// heads, which seems excessive given that it doesn't do hashing, it just
// shuffles some data. For comparison, the *hashing* at chain head is 2-3ms.
// We need to investigate what's happening as it seems something's wonky.
// Obviously it's not an end of the world issue, just something the original
// code didn't anticipate for.
workers.Go(func() error {
// Write the account trie changes, measuring the amount of wasted time
_, set := s.trie.Commit(true)
if err := merge(set); err != nil {
return err
}
s.AccountCommits = time.Since(start)
return nil
})
// Schedule each of the storage tries that need to be updated, so they can
// run concurrently to one another.
//
// TODO(karalabe): Experimentally, the account commit takes approximately the
// same time as all the storage commits combined, so we could maybe only have
// 2 threads in total. But that kind of depends on the account commit being
// more expensive than it should be, so let's fix that and revisit this todo.
// Aggregated account updates
updates := make(map[common.Hash]*accountUpdate, len(s.mutations))
for addr, op := range s.mutations {
if op.isDelete() {
continue
@ -1260,44 +978,25 @@ func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool, blockNum
if obj == nil {
return nil, errors.New("missing state object")
}
// Run the storage updates concurrently to one another
workers.Go(func() error {
// Write any storage changes in the state object to its storage trie
update, set, err := obj.commit()
if err != nil {
return err
}
if err := merge(set); err != nil {
return err
}
lock.Lock()
updates[obj.addrHash()] = update
s.StorageCommits = time.Since(start) // overwrite with the longest storage commit runtime
lock.Unlock()
return nil
})
update, err := obj.commit()
if err != nil {
return nil, err
}
updates[obj.addrHash()] = update
}
// Wait for everything to finish and update the metrics
if err := workers.Wait(); err != nil {
// Handle all state updates afterwards, concurrently to one another to shave
// off some milliseconds from the commit operation. Also accumulate the code
// writes to run in parallel with the computations.
start := time.Now()
root, set, secondaryHashes, err := s.hasher.Commit()
if err != nil {
return nil, err
}
accountReadMeters.Mark(int64(s.AccountLoaded))
storageReadMeters.Mark(int64(s.StorageLoaded))
accountUpdatedMeter.Mark(int64(s.AccountUpdated))
storageUpdatedMeter.Mark(s.StorageUpdated.Load())
accountDeletedMeter.Mark(int64(s.AccountDeleted))
storageDeletedMeter.Mark(s.StorageDeleted.Load())
accountTrieUpdatedMeter.Mark(int64(accountTrieNodesUpdated))
accountTrieDeletedMeter.Mark(int64(accountTrieNodesDeleted))
storageTriesUpdatedMeter.Mark(int64(storageTrieNodesUpdated))
storageTriesDeletedMeter.Mark(int64(storageTrieNodesDeleted))
// Clear the metric markers
s.AccountLoaded, s.AccountUpdated, s.AccountDeleted = 0, 0, 0
s.StorageLoaded = 0
s.StorageUpdated.Store(0)
s.StorageDeleted.Store(0)
s.HasherCommits = time.Since(start)
if err := nodes.MergeSet(set); err != nil {
return nil, err
}
// Clear all internal flags and update state root at the end.
s.mutations = make(map[common.Address]*mutation)
s.stateObjectsDestruct = make(map[common.Address]*stateObject)
@ -1305,7 +1004,22 @@ func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool, blockNum
origin := s.originalRoot
s.originalRoot = root
return newStateUpdate(noStorageWiping, origin, root, blockNumber, deletes, updates, nodes), nil
if s.witness != nil {
builder, ok := s.hasher.(WitnessCollector)
if ok {
builder.CollectWitness(s.witness)
}
}
// If the hasher tracks flat-state leaf production (currently only the
// binary hasher), drain the buffered stem writes so the downstream
// state update can carry them into the pathdb flat-state layer. Merkle
// hashers do not implement this interface and the call short-circuits
// to nil — newStateUpdate accepts nil as "no leaves".
var leaves []StemWrite
if producer, ok := s.hasher.(LeafProducer); ok {
leaves = producer.DrainStemWrites()
}
return newStateUpdate(noStorageWiping, origin, root, blockNumber, deletes, updates, nodes, secondaryHashes, leaves), nil
}
// commitAndFlush is a wrapper of commit which also commits the state mutations
@ -1326,10 +1040,19 @@ func (s *StateDB) commitAndFlush(block uint64, deleteEmptyObjects bool, noStorag
}
s.DatabaseCommits = time.Since(start)
// The reader update must be performed as the final step, otherwise,
// the new state would not be visible before db.commit.
s.reader, _ = s.db.Reader(s.originalRoot)
return ret, err
reader, err := s.db.Reader(s.originalRoot)
if err != nil {
return nil, err
}
s.reader = reader
hasher, err := s.db.Hasher(s.originalRoot)
if err != nil {
return nil, err
}
s.hasher = hasher
return ret, nil
}
// Commit writes the state mutations into the configured data stores.
@ -1440,25 +1163,6 @@ func (s *StateDB) SlotInAccessList(addr common.Address, slot common.Hash) (addre
return s.accessList.Contains(addr, slot)
}
// markDelete is invoked when an account is deleted but the deletion is
// not yet committed. The pending mutation is cached and will be applied
// all together
func (s *StateDB) markDelete(addr common.Address) {
if _, ok := s.mutations[addr]; !ok {
s.mutations[addr] = &mutation{}
}
s.mutations[addr].applied = false
s.mutations[addr].typ = deletion
}
func (s *StateDB) markUpdate(addr common.Address) {
if _, ok := s.mutations[addr]; !ok {
s.mutations[addr] = &mutation{}
}
s.mutations[addr].applied = false
s.mutations[addr].typ = update
}
// Witness retrieves the current state witness being collected.
func (s *StateDB) Witness() *stateless.Witness {
return s.witness
@ -1467,3 +1171,15 @@ func (s *StateDB) Witness() *stateless.Witness {
func (s *StateDB) AccessEvents() *AccessEvents {
return s.accessEvents
}
// StopPrefetcher terminates all the background prefetching activities.
func (s *StateDB) StopPrefetcher() {
if s.hasher == nil {
return
}
prefetch, ok := s.hasher.(Prefetcher)
if !ok {
return
}
prefetch.TermPrefetch()
}

View file

@ -183,10 +183,11 @@ func (test *stateTest) run() bool {
storages []map[common.Hash]map[common.Hash][]byte
storageOrigin []map[common.Address]map[common.Hash][]byte
copyUpdate = func(update *stateUpdate) {
accounts = append(accounts, maps.Clone(update.accounts))
accountOrigin = append(accountOrigin, maps.Clone(update.accountsOrigin))
storages = append(storages, maps.Clone(update.storages))
storageOrigin = append(storageOrigin, maps.Clone(update.storagesOrigin))
encoded, _ := update.stateSet(true)
accounts = append(accounts, maps.Clone(encoded.Accounts))
accountOrigin = append(accountOrigin, maps.Clone(encoded.AccountsOrigin))
storages = append(storages, maps.Clone(encoded.Storages))
storageOrigin = append(storageOrigin, maps.Clone(encoded.StoragesOrigin))
}
disk = rawdb.NewMemoryDatabase()
tdb = triedb.NewDatabase(disk, &triedb.Config{PathDB: pathdb.Defaults})

View file

@ -0,0 +1,61 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package state
import (
"sync/atomic"
"time"
)
// Stats contains all measurements gathered during state execution for
// debugging and metrics purposes.
type Stats struct {
AccountReads time.Duration // Account read time
StorageReads time.Duration // Storage read time
CodeReads time.Duration // Code read time
AccountHashes time.Duration // Account trie hash time
AccountUpdates time.Duration // Account trie update time
StorageUpdates time.Duration // Storage trie update and hash time
HasherCommits time.Duration // Trie commit time
DatabaseCommits time.Duration // Database commit time
AccountLoaded int // Number of accounts retrieved from the database during the state transition
AccountUpdated int // Number of accounts updated during the state transition
AccountDeleted int // Number of accounts deleted during the state transition
StorageLoaded int // Number of storage slots retrieved from the database during the state transition
StorageUpdated atomic.Int64 // Number of storage slots updated during the state transition
StorageDeleted atomic.Int64 // Number of storage slots deleted during the state transition
// CodeLoadBytes is the total number of bytes read from contract code.
// This value may be smaller than the actual number of bytes read, since
// some APIs (e.g. CodeSize) may load the entire code from either the
// cache or the database when the size is not available in the cache.
CodeLoaded int // Number of contract code loaded during the state transition
CodeLoadBytes int // Total bytes of resolved code
CodeUpdated int // Number of contracts with code changes that persisted
CodeUpdateBytes int // Total bytes of persisted code written
}
// StateReadTime returns the total time spent on the state read.
func (s *Stats) StateReadTime() time.Duration {
return s.AccountReads + s.StorageReads + s.CodeReads
}
// StateHashTime returns the total time spent on the state hash.
func (s *Stats) StateHashTime() time.Duration {
return s.AccountHashes + s.AccountUpdates + s.StorageUpdates
}

View file

@ -32,13 +32,8 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/state/snapshot"
"github.com/ethereum/go-ethereum/core/tracing"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
"github.com/ethereum/go-ethereum/trie/trienode"
"github.com/ethereum/go-ethereum/triedb"
"github.com/ethereum/go-ethereum/triedb/hashdb"
"github.com/ethereum/go-ethereum/triedb/pathdb"
@ -232,7 +227,7 @@ func TestCopyWithDirtyJournal(t *testing.T) {
for i := byte(0); i < 255; i++ {
obj := orig.getOrNewStateObject(common.BytesToAddress([]byte{i}))
obj.AddBalance(uint256.NewInt(uint64(i)))
obj.data.Root = common.HexToHash("0xdeadbeef")
//obj.data.Root = common.HexToHash("0xdeadbeef")
}
root, _ := orig.Commit(0, true, false)
orig, _ = New(root, db)
@ -275,7 +270,7 @@ func TestCopyObjectState(t *testing.T) {
for i := byte(0); i < 5; i++ {
obj := orig.getOrNewStateObject(common.BytesToAddress([]byte{i}))
obj.AddBalance(uint256.NewInt(uint64(i)))
obj.data.Root = common.HexToHash("0xdeadbeef")
//obj.data.Root = common.HexToHash("0xdeadbeef")
}
orig.Finalise(true)
cpy := orig.Copy()
@ -543,47 +538,6 @@ func (test *snapshotTest) run() bool {
return true
}
func forEachStorage(s *StateDB, addr common.Address, cb func(key, value common.Hash) bool) error {
so := s.getStateObject(addr)
if so == nil {
return nil
}
tr, err := so.getTrie()
if err != nil {
return err
}
trieIt, err := tr.NodeIterator(nil)
if err != nil {
return err
}
var (
it = trie.NewIterator(trieIt)
visited = make(map[common.Hash]bool)
)
for it.Next() {
key := common.BytesToHash(tr.GetKey(it.Key))
visited[key] = true
if value, dirty := so.dirtyStorage[key]; dirty {
if !cb(key, value) {
return nil
}
continue
}
if len(it.Value) > 0 {
_, content, _, err := rlp.Split(it.Value)
if err != nil {
return err
}
if !cb(key, common.BytesToHash(content)) {
return nil
}
}
}
return nil
}
// checkEqual checks that methods of state and checkstate return the same values.
func (test *snapshotTest) checkEqual(state, checkstate *StateDB) error {
for _, addr := range test.addrs {
@ -609,12 +563,6 @@ func (test *snapshotTest) checkEqual(state, checkstate *StateDB) error {
}
// Check storage.
if obj := state.getStateObject(addr); obj != nil {
forEachStorage(state, addr, func(key, value common.Hash) bool {
return checkeq("GetState("+key.Hex()+")", checkstate.GetState(addr, key), value)
})
forEachStorage(checkstate, addr, func(key, value common.Hash) bool {
return checkeq("GetState("+key.Hex()+")", checkstate.GetState(addr, key), value)
})
other := checkstate.getStateObject(addr)
// Check dirty storage which is not in trie
if !maps.Equal(obj.dirtyStorage, other.dirtyStorage) {
@ -773,8 +721,14 @@ func TestCopyCommitCopy(t *testing.T) {
t.Fatalf("second copy committed storage slot mismatch: have %x, want %x", val, common.Hash{})
}
// Commit state, ensure states can be loaded from disk
root, _ := state.Commit(0, false, false)
state, _ = New(root, tdb)
root, err := state.Commit(0, false, false)
if err != nil {
t.Fatalf("commit fail: %v", err)
}
state, err = New(root, tdb)
if err != nil {
t.Fatalf("New fail: %v", err)
}
if balance := state.GetBalance(addr); balance.Cmp(uint256.NewInt(42)) != 0 {
t.Fatalf("state post-commit balance mismatch: have %v, want %v", balance, 42)
}
@ -1269,60 +1223,6 @@ func TestStateDBTransientStorage(t *testing.T) {
}
}
func TestDeleteStorage(t *testing.T) {
var (
disk = rawdb.NewMemoryDatabase()
tdb = triedb.NewDatabase(disk, nil)
snaps, _ = snapshot.New(snapshot.Config{CacheSize: 10}, disk, tdb, types.EmptyRootHash)
db = NewDatabase(tdb, nil).WithSnapshot(snaps)
state, _ = New(types.EmptyRootHash, db)
addr = common.HexToAddress("0x1")
)
// Initialize account and populate storage
state.SetBalance(addr, uint256.NewInt(1), tracing.BalanceChangeUnspecified)
state.CreateAccount(addr)
for i := 0; i < 1000; i++ {
slot := common.Hash(uint256.NewInt(uint64(i)).Bytes32())
value := common.Hash(uint256.NewInt(uint64(10 * i)).Bytes32())
state.SetState(addr, slot, value)
}
root, _ := state.Commit(0, true, false)
// Init phase done, create two states, one with snap and one without
fastState, _ := New(root, NewDatabase(tdb, nil).WithSnapshot(snaps))
slowState, _ := New(root, NewDatabase(tdb, nil))
obj := fastState.getOrNewStateObject(addr)
storageRoot := obj.data.Root
_, _, fastNodes, err := fastState.deleteStorage(crypto.Keccak256Hash(addr[:]), storageRoot)
if err != nil {
t.Fatal(err)
}
_, _, slowNodes, err := slowState.deleteStorage(crypto.Keccak256Hash(addr[:]), storageRoot)
if err != nil {
t.Fatal(err)
}
check := func(set *trienode.NodeSet) string {
var a []string
set.ForEachWithOrder(func(path string, n *trienode.Node) {
if n.Hash != (common.Hash{}) {
t.Fatal("delete should have empty hashes")
}
if len(n.Blob) != 0 {
t.Fatal("delete should have empty blobs")
}
a = append(a, fmt.Sprintf("%x", path))
})
return strings.Join(a, ",")
}
slowRes := check(slowNodes)
fastRes := check(fastNodes)
if slowRes != fastRes {
t.Fatalf("difference found:\nfast: %v\nslow: %v\n", fastRes, slowRes)
}
}
func TestStorageDirtiness(t *testing.T) {
var (
disk = rawdb.NewMemoryDatabase()
@ -1366,3 +1266,85 @@ func TestStorageDirtiness(t *testing.T) {
state.RevertToSnapshot(snap)
checkDirty(common.Hash{0x1}, common.Hash{0x1}, true)
}
// TestVerkleCodeSizePreserved is a regression test for a latent bug in the
// binary-trie update path of binaryHasher: codeLen was derived from
// account.Code, which is only non-nil when the contract code itself was
// modified in the current block. For balance- or nonce-only changes,
// account.Code was nil and the hasher silently wrote codeLen=0 into the
// BasicData leaf, corrupting the EIP-7864-defined code_size field every
// time a contract's balance or nonce was touched without a code write.
//
// The fix plumbs the account's current total code size through
// AccountMut.CodeSize, which the caller populates via
// stateObject.CodeSize() at commit time. This value is authoritative
// whether or not the code bytes are currently loaded.
//
// This test verifies that the state root produced by "create contract,
// commit, reload, modify balance, commit" matches the state root produced
// by a single commit of the final state. Equality can only hold if the
// code size survives the balance-only commit.
func TestVerkleCodeSizePreserved(t *testing.T) {
newVerkleState := func(t *testing.T) (*StateDB, *triedb.Database) {
t.Helper()
disk := rawdb.NewMemoryDatabase()
tdb := triedb.NewDatabase(disk, triedb.VerkleDefaults)
sdb := NewDatabase(tdb, nil)
// A fresh verkle pathdb's disk layer is keyed by EmptyVerkleHash
// (all-zero hash), not EmptyRootHash. Using the wrong one fails
// with "triedb parent layer missing" at commit.
state, err := New(types.EmptyVerkleHash, sdb)
if err != nil {
t.Fatalf("failed to initialize state: %v", err)
}
return state, tdb
}
var (
addr = common.HexToAddress("0xdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
code = make([]byte, 1234) // non-trivial code length so codeSize matters
)
for i := range code {
code[i] = byte(i)
}
// Path A: create contract, commit, reload, modify only balance, commit.
// On the second commit obj.code is not loaded (dirtyCode=false), so
// the previous implementation computed codeLen=0 via len(obj.code).
// Triedb layers stay in memory (no tdb.Commit) so we can chain a
// second block on top of the first.
stateA, tdbA := newVerkleState(t)
sdbA := NewDatabase(tdbA, nil)
stateA.SetBalance(addr, uint256.NewInt(100), tracing.BalanceChangeUnspecified)
stateA.SetCode(addr, code, tracing.CodeChangeUnspecified)
rootA1, err := stateA.Commit(0, true, false)
if err != nil {
t.Fatalf("path A first commit: %v", err)
}
stateA, err = New(rootA1, sdbA)
if err != nil {
t.Fatalf("path A reload: %v", err)
}
stateA.SetBalance(addr, uint256.NewInt(200), tracing.BalanceChangeUnspecified)
rootA2, err := stateA.Commit(1, true, false)
if err != nil {
t.Fatalf("path A second commit: %v", err)
}
// Path B: construct the same final state in one shot (balance=200 + code).
// obj.code is loaded because SetCode was just called, so codeSize is
// always correct here — this is the "known-good" reference.
stateB, _ := newVerkleState(t)
stateB.SetBalance(addr, uint256.NewInt(200), tracing.BalanceChangeUnspecified)
stateB.SetCode(addr, code, tracing.CodeChangeUnspecified)
rootB, err := stateB.Commit(0, true, false)
if err != nil {
t.Fatalf("path B commit: %v", err)
}
if rootA2 != rootB {
t.Fatalf("state root mismatch after balance-only update:\n path A (reload + balance): %x\n path B (fresh, same final state): %x\n regression: binaryHasher.updateAccount used len(account.Code.Code)=0 because code was not modified",
rootA2, rootB)
}
}

View file

@ -17,6 +17,7 @@
package state
import (
"errors"
"fmt"
"maps"
@ -29,71 +30,75 @@ import (
"github.com/ethereum/go-ethereum/triedb"
)
// contractCode represents contract bytecode along with its associated metadata.
// contractCode encapsulates contract bytecode and its associated metadata.
type contractCode struct {
hash common.Hash // hash is the cryptographic hash of the current contract code.
blob []byte // blob is the binary representation of the current contract code.
originHash common.Hash // originHash is the cryptographic hash of the code before mutation.
originHash common.Hash // originHash is the cryptographic hash of the code prior to mutation.
blob []byte // blob is the raw byte representation of the current contract code.
// Derived fields, populated only when state tracking is enabled.
duplicate bool // duplicate indicates whether the updated code already exists.
originBlob []byte // originBlob is the original binary representation of the contract code.
originBlob []byte // originBlob is the original byte representation of the contract code.
}
// accountDelete represents an operation for deleting an Ethereum account.
// accountDelete represents a deletion operation for an Ethereum account.
type accountDelete struct {
address common.Address // address is the unique account identifier
origin []byte // origin is the original value of account data in slim-RLP encoding.
address common.Address // address uniquely identifies the account.
origin Account // origin is the account state prior to deletion.
// storages stores mutated slots, the value should be nil.
storages map[common.Hash][]byte
// storagesOrigin stores the original values of mutated slots in
// prefix-zero-trimmed RLP format. The map key refers to the **HASH**
// of the raw storage slot key.
storagesOrigin map[common.Hash][]byte
storages map[common.Hash]common.Hash // storages contains mutated storage slots.
storagesOrigin map[common.Hash]common.Hash // storagesOrigin holds original values of mutated slots; keys are hashes of raw storage slot keys.
}
// accountUpdate represents an operation for updating an Ethereum account.
// accountUpdate represents an update operation for an Ethereum account.
type accountUpdate struct {
address common.Address // address is the unique account identifier
data []byte // data is the slim-RLP encoded account data.
origin []byte // origin is the original value of account data in slim-RLP encoding.
code *contractCode // code represents mutated contract code; nil means it's not modified.
storages map[common.Hash][]byte // storages stores mutated slots in prefix-zero-trimmed RLP format.
address common.Address // address uniquely identifies the account.
data *Account // data is the updated account state; nil indicates deletion.
origin *Account // origin is the previous account state; nil indicates non-existence.
code *contractCode // code contains updated contract code; nil if unchanged.
storages map[common.Hash]common.Hash // storages contains updated storage slots.
// storagesOriginByKey and storagesOriginByHash both store the original values
// of mutated slots in prefix-zero-trimmed RLP format. The difference is that
// storagesOriginByKey uses the **raw** storage slot key as the map ID, while
// storagesOriginByHash uses the **hash** of the storage slot key instead.
storagesOriginByKey map[common.Hash][]byte
storagesOriginByHash map[common.Hash][]byte
// storagesOriginByKey and storagesOriginByHash both record original values
// of mutated storage slots:
// - storagesOriginByKey uses raw storage slot keys.
// - storagesOriginByHash uses hashed storage slot keys.
storagesOriginByKey map[common.Hash]common.Hash
storagesOriginByHash map[common.Hash]common.Hash
}
// stateUpdate represents the difference between two states resulting from state
// execution. It contains information about mutated contract codes, accounts,
// and storage slots, along with their original values.
// stateUpdate captures the difference between two states resulting from
// execution. It records all mutated accounts, contract codes, and storage
// slots, along with their original values.
type stateUpdate struct {
originRoot common.Hash // hash of the state before applying mutation
root common.Hash // hash of the state after applying mutation
blockNumber uint64 // Associated block number
originRoot common.Hash // originRoot is the state root before applying changes.
root common.Hash // root is the state root after applying changes.
blockNumber uint64 // blockNumber is the associated block height.
accounts map[common.Hash][]byte // accounts stores mutated accounts in 'slim RLP' encoding
accountsOrigin map[common.Address][]byte // accountsOrigin stores the original values of mutated accounts in 'slim RLP' encoding
accounts map[common.Hash]*Account // accounts contains mutated accounts, keyed by account hash.
accountsOrigin map[common.Address]*Account // accountsOrigin holds original values of mutated accounts, keyed by address.
// storages stores mutated slots in 'prefix-zero-trimmed' RLP format.
// The value is keyed by account hash and **storage slot key hash**.
storages map[common.Hash]map[common.Hash][]byte
// storages contains mutated storage slots, keyed by account hash and
// storage slot key hash.
storages map[common.Hash]map[common.Hash]common.Hash
// storagesOrigin stores the original values of mutated slots in
// 'prefix-zero-trimmed' RLP format.
// (a) the value is keyed by account hash and **storage slot key** if rawStorageKey is true;
// (b) the value is keyed by account hash and **storage slot key hash** if rawStorageKey is false;
storagesOrigin map[common.Address]map[common.Hash][]byte
// storagesOrigin holds original values of mutated storage slots.
// The key format depends on rawStorageKey:
// - if true: keyed by account address and raw storage slot key.
// - if false: keyed by account address and storage slot key hash.
storagesOrigin map[common.Address]map[common.Hash]common.Hash
rawStorageKey bool
codes map[common.Address]*contractCode // codes contains the set of dirty codes
nodes *trienode.MergedNodeSet // Aggregated dirty nodes caused by state changes
codes map[common.Address]*contractCode // codes contains mutated contract codes, keyed by address.
nodes *trienode.MergedNodeSet // nodes aggregates all dirty trie nodes produced by the update.
secondaryHashes map[common.Address]Hashes // hashes of secondary tries
// leaves is the ordered list of stem-offset writes harvested from a
// LeafProducer-capable hasher (the binary hasher). For merkle hashers
// it is always nil; for the binary hasher it is the bintrie's view of
// the same state mutations the trie just absorbed, in flat-state form.
// encodeBinary turns this into the per-offset accountData map that
// pathdb's bintrie codec consumes at flush time.
leaves []StemWrite
}
// empty returns a flag indicating the state transition is empty or not.
@ -107,12 +112,16 @@ func (sc *stateUpdate) empty() bool {
//
// rawStorageKey is a flag indicating whether to use the raw storage slot key or
// the hash of the slot key for constructing state update object.
func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash, blockNumber uint64, deletes map[common.Hash]*accountDelete, updates map[common.Hash]*accountUpdate, nodes *trienode.MergedNodeSet) *stateUpdate {
//
// leaves carries the per-offset stem writes produced by a LeafProducer-capable
// hasher (the binary hasher). It is nil for merkle hashers and consumed by
// encodeBinary to populate the bintrie flat-state map.
func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash, blockNumber uint64, deletes map[common.Hash]*accountDelete, updates map[common.Hash]*accountUpdate, nodes *trienode.MergedNodeSet, secondaryHashes map[common.Address]Hashes, leaves []StemWrite) *stateUpdate {
var (
accounts = make(map[common.Hash][]byte)
accountsOrigin = make(map[common.Address][]byte)
storages = make(map[common.Hash]map[common.Hash][]byte)
storagesOrigin = make(map[common.Address]map[common.Hash][]byte)
accounts = make(map[common.Hash]*Account)
accountsOrigin = make(map[common.Address]*Account)
storages = make(map[common.Hash]map[common.Hash]common.Hash)
storagesOrigin = make(map[common.Address]map[common.Hash]common.Hash)
codes = make(map[common.Address]*contractCode)
)
// Since some accounts might be destroyed and recreated within the same
@ -120,7 +129,7 @@ func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash
for addrHash, op := range deletes {
addr := op.address
accounts[addrHash] = nil
accountsOrigin[addr] = op.origin
accountsOrigin[addr] = &op.origin
// If storage wiping exists, the hash of the storage slot key must be used
if len(op.storages) > 0 {
@ -174,31 +183,168 @@ func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash
}
}
return &stateUpdate{
originRoot: originRoot,
root: root,
blockNumber: blockNumber,
accounts: accounts,
accountsOrigin: accountsOrigin,
storages: storages,
storagesOrigin: storagesOrigin,
rawStorageKey: rawStorageKey,
codes: codes,
nodes: nodes,
originRoot: originRoot,
root: root,
blockNumber: blockNumber,
accounts: accounts,
accountsOrigin: accountsOrigin,
storages: storages,
storagesOrigin: storagesOrigin,
rawStorageKey: rawStorageKey,
codes: codes,
nodes: nodes,
secondaryHashes: secondaryHashes,
leaves: leaves,
}
}
func encodeSlot(val common.Hash) []byte {
if val == (common.Hash{}) {
return nil
}
blob, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(val[:]))
return blob
}
func (sc *stateUpdate) encodeMerkle() (map[common.Hash][]byte, map[common.Address][]byte, map[common.Hash]map[common.Hash][]byte, map[common.Address]map[common.Hash][]byte, error) {
var (
accounts = make(map[common.Hash][]byte)
storages = make(map[common.Hash]map[common.Hash][]byte)
accountOrigin = make(map[common.Address][]byte)
storageOrigin = make(map[common.Address]map[common.Hash][]byte)
)
for addr, prev := range sc.accountsOrigin {
if prev == nil {
accountOrigin[addr] = nil
} else {
pair, ok := sc.secondaryHashes[addr]
if !ok {
return nil, nil, nil, nil, errors.New("no secondary hash")
}
accountOrigin[addr] = types.SlimAccountRLP(types.StateAccount{
Balance: prev.Balance,
Nonce: prev.Nonce,
CodeHash: prev.CodeHash,
Root: pair.Prev,
})
}
addrHash := crypto.Keccak256Hash(addr.Bytes())
data := sc.accounts[addrHash]
if data == nil {
accounts[addrHash] = nil
} else {
pair, ok := sc.secondaryHashes[addr]
if !ok {
return nil, nil, nil, nil, errors.New("no secondary hash")
}
accounts[addrHash] = types.SlimAccountRLP(types.StateAccount{
Balance: data.Balance,
Nonce: data.Nonce,
CodeHash: data.CodeHash,
Root: pair.Hash,
})
}
}
for addr, slots := range sc.storagesOrigin {
subset := make(map[common.Hash][]byte)
for key, val := range slots {
subset[key] = encodeSlot(val)
}
storageOrigin[addr] = subset
}
for addrHash, slots := range sc.storages {
subset := make(map[common.Hash][]byte)
for key, val := range slots {
subset[key] = encodeSlot(val)
}
storages[addrHash] = subset
}
return accounts, accountOrigin, storages, storageOrigin, nil
}
// encodeBinary produces the bintrie flat-state representation consumed by
// pathdb. Unlike encodeMerkle (which keys accounts/storage by keccak hashes
// and slim-RLP encodes the values), the bintrie path uses one entry per
// EIP-7864 leaf:
//
// key = stem(31B) || offset(1B), zero-padded into a common.Hash
// value = the 32-byte leaf payload, or nil to clear the offset
//
// Account header writes (BasicData at offset 0, CodeHash at offset 1) and
// storage slot / code chunk writes are uniform — the binary hasher emits
// each as a stemWrite via DrainStemWrites and we route every one of them
// into the accounts map. The storages map stays empty: bintrie has no
// per-account storage grouping at the flat-state layer, and pathdb's
// disklayer/lookup tree both work fine with a single accountData map of
// 32-byte keys.
//
// accountOrigin and storageOrigin are returned empty because state-history
// rollback for bintrie is not yet supported. The pathdb disklayer.revert
// guard blocks bintrie reverts before it would observe these maps.
func (sc *stateUpdate) encodeBinary() (map[common.Hash][]byte, map[common.Address][]byte, map[common.Hash]map[common.Hash][]byte, map[common.Address]map[common.Hash][]byte, error) {
var (
accounts = make(map[common.Hash][]byte, len(sc.leaves))
storages = make(map[common.Hash]map[common.Hash][]byte)
accountOrigin = make(map[common.Address][]byte)
storageOrigin = make(map[common.Address]map[common.Hash][]byte)
)
for _, w := range sc.leaves {
var fullKey common.Hash
copy(fullKey[:len(w.Stem)], w.Stem[:])
fullKey[len(w.Stem)] = w.Offset
// nil Value means "clear this offset" (account delete or storage
// slot wipe). The pathdb codec interprets a nil entry as a delete
// during flush, matching merkle's nil-blob convention.
if w.Value == nil {
accounts[fullKey] = nil
continue
}
// Defensive length check: every non-nil bintrie leaf must be
// exactly 32 bytes. A wrong-length leaf from the hasher would
// silently produce garbage in the diff layer; catch it here at
// the trust boundary rather than deep in the flush path where
// the stemBuilder.set panic would fire with less context.
if len(w.Value) != 32 {
return nil, nil, nil, nil, fmt.Errorf("bintrie leaf at stem %x offset %d has value len %d, want 32", w.Stem, w.Offset, len(w.Value))
}
// Take an owning copy: the hasher reuses its underlying buffers
// across blocks, so retaining its slices would create cross-block
// aliasing bugs in the pathdb diff layer.
v := make([]byte, 32)
copy(v, w.Value)
accounts[fullKey] = v
}
return accounts, accountOrigin, storages, storageOrigin, nil
}
// stateSet converts the current stateUpdate object into a triedb.StateSet
// object. This function extracts the necessary data from the stateUpdate
// struct and formats it into the StateSet structure consumed by the triedb
// package.
func (sc *stateUpdate) stateSet() *triedb.StateSet {
return &triedb.StateSet{
Accounts: sc.accounts,
AccountsOrigin: sc.accountsOrigin,
Storages: sc.storages,
StoragesOrigin: sc.storagesOrigin,
RawStorageKey: sc.rawStorageKey,
func (sc *stateUpdate) stateSet(isMerkle bool) (*triedb.StateSet, error) {
var (
err error
accounts map[common.Hash][]byte
storages map[common.Hash]map[common.Hash][]byte
accountOrigin map[common.Address][]byte
storageOrigin map[common.Address]map[common.Hash][]byte
)
if isMerkle {
accounts, accountOrigin, storages, storageOrigin, err = sc.encodeMerkle()
} else {
accounts, accountOrigin, storages, storageOrigin, err = sc.encodeBinary()
}
if err != nil {
return nil, err
}
return &triedb.StateSet{
Accounts: accounts,
AccountsOrigin: accountOrigin,
Storages: storages,
StoragesOrigin: storageOrigin,
RawStorageKey: sc.rawStorageKey,
}, nil
}
// deriveCodeFields derives the missing fields of contract code changes
@ -246,30 +392,33 @@ func (sc *stateUpdate) ToTracingUpdate() (*tracing.StateUpdate, error) {
if !exists {
return nil, fmt.Errorf("account %x not found", addr)
}
var hashes Hashes
if sc.secondaryHashes != nil {
var ok bool
hashes, ok = sc.secondaryHashes[addr]
if !ok {
return nil, fmt.Errorf("ToTracingUpdate: missing secondary hash for %x", addr)
}
} else {
// Bintrie: no per-account storage sub-tries, use empty root.
hashes = Hashes{Hash: types.EmptyRootHash, Prev: types.EmptyRootHash}
}
change := &tracing.AccountChange{}
if len(oldData) > 0 {
acct, err := types.FullAccount(oldData)
if err != nil {
return nil, err
}
if oldData != nil {
change.Prev = &types.StateAccount{
Nonce: acct.Nonce,
Balance: acct.Balance,
Root: acct.Root,
CodeHash: acct.CodeHash,
Nonce: oldData.Nonce,
Balance: oldData.Balance,
Root: hashes.Prev,
CodeHash: oldData.CodeHash,
}
}
if len(newData) > 0 {
acct, err := types.FullAccount(newData)
if err != nil {
return nil, err
}
if newData != nil {
change.New = &types.StateAccount{
Nonce: acct.Nonce,
Balance: acct.Balance,
Root: acct.Root,
CodeHash: acct.CodeHash,
Nonce: newData.Nonce,
Balance: newData.Balance,
Root: hashes.Hash,
CodeHash: newData.CodeHash,
}
}
update.AccountChanges[addr] = change
@ -284,40 +433,24 @@ func (sc *stateUpdate) ToTracingUpdate() (*tracing.StateUpdate, error) {
}
storageChanges := make(map[common.Hash]*tracing.StorageChange, len(slots))
for key, encPrev := range slots {
for key, prev := range slots {
// Get new value - handle both raw and hashed key formats
var (
exists bool
encNew []byte
decPrev []byte
decNew []byte
err error
current common.Hash
)
if sc.rawStorageKey {
encNew, exists = subset[crypto.Keccak256Hash(key.Bytes())]
current, exists = subset[crypto.Keccak256Hash(key.Bytes())]
} else {
encNew, exists = subset[key]
current, exists = subset[key]
}
if !exists {
return nil, fmt.Errorf("storage slot %x-%x not found", addr, key)
}
// Decode the prev and new values
if len(encPrev) > 0 {
_, decPrev, _, err = rlp.Split(encPrev)
if err != nil {
return nil, fmt.Errorf("failed to decode prevValue: %v", err)
}
}
if len(encNew) > 0 {
_, decNew, _, err = rlp.Split(encNew)
if err != nil {
return nil, fmt.Errorf("failed to decode newValue: %v", err)
}
}
storageChanges[key] = &tracing.StorageChange{
Prev: common.BytesToHash(decPrev),
New: common.BytesToHash(decNew),
Prev: prev,
New: current,
}
}
update.StorageChanges[addr] = storageChanges

View file

@ -22,451 +22,225 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics"
)
var (
// triePrefetchMetricsPrefix is the prefix under which to publish the metrics.
triePrefetchMetricsPrefix = "trie/prefetch/"
var errTerminated = errors.New("fetcher is already terminated")
// errTerminated is returned if a fetcher is attempted to be operated after it
// has already terminated.
errTerminated = errors.New("fetcher is already terminated")
type slotKey struct {
addr common.Address
slot common.Hash
}
type taskKind uint8
const (
kindAccount taskKind = iota
kindStorage
)
// triePrefetcher is an active prefetcher, which receives accounts or storage
// items and does trie-loading of them. The goal is to get as much useful content
// into the caches as possible.
//
// Note, the prefetcher's API is not thread safe.
type triePrefetcher struct {
verkle bool // Flag whether the prefetcher is in verkle mode
db Database // Database to fetch trie nodes through
root common.Hash // Root hash of the account trie for metrics
fetchers map[string]*subfetcher // Subfetchers for each trie
term chan struct{} // Channel to signal interruption
noreads bool // Whether to ignore state-read-only prefetch requests
type prefetchTask struct {
read bool
kind taskKind
deliveryMissMeter *metrics.Meter
accountLoadReadMeter *metrics.Meter
accountLoadWriteMeter *metrics.Meter
accountDupReadMeter *metrics.Meter
accountDupWriteMeter *metrics.Meter
accountDupCrossMeter *metrics.Meter
accountWasteMeter *metrics.Meter
storageLoadReadMeter *metrics.Meter
storageLoadWriteMeter *metrics.Meter
storageDupReadMeter *metrics.Meter
storageDupWriteMeter *metrics.Meter
storageDupCrossMeter *metrics.Meter
storageWasteMeter *metrics.Meter
accounts []common.Address // kindAccount: addresses to prefetch
account common.Address // kindStorage: owner address
slots []common.Hash // kindStorage: slot keys to prefetch
}
func newTriePrefetcher(db Database, root common.Hash, namespace string, noreads bool) *triePrefetcher {
prefix := triePrefetchMetricsPrefix + namespace
return &triePrefetcher{
verkle: db.TrieDB().IsVerkle(),
db: db,
root: root,
fetchers: make(map[string]*subfetcher), // Active prefetchers use the fetchers map
term: make(chan struct{}),
noreads: noreads,
deliveryMissMeter: metrics.GetOrRegisterMeter(prefix+"/deliverymiss", nil),
accountLoadReadMeter: metrics.GetOrRegisterMeter(prefix+"/account/load/read", nil),
accountLoadWriteMeter: metrics.GetOrRegisterMeter(prefix+"/account/load/write", nil),
accountDupReadMeter: metrics.GetOrRegisterMeter(prefix+"/account/dup/read", nil),
accountDupWriteMeter: metrics.GetOrRegisterMeter(prefix+"/account/dup/write", nil),
accountDupCrossMeter: metrics.GetOrRegisterMeter(prefix+"/account/dup/cross", nil),
accountWasteMeter: metrics.GetOrRegisterMeter(prefix+"/account/waste", nil),
storageLoadReadMeter: metrics.GetOrRegisterMeter(prefix+"/storage/load/read", nil),
storageLoadWriteMeter: metrics.GetOrRegisterMeter(prefix+"/storage/load/write", nil),
storageDupReadMeter: metrics.GetOrRegisterMeter(prefix+"/storage/dup/read", nil),
storageDupWriteMeter: metrics.GetOrRegisterMeter(prefix+"/storage/dup/write", nil),
storageDupCrossMeter: metrics.GetOrRegisterMeter(prefix+"/storage/dup/cross", nil),
storageWasteMeter: metrics.GetOrRegisterMeter(prefix+"/storage/waste", nil),
}
}
// terminate iterates over all the subfetchers and issues a termination request
// to all of them. Depending on the async parameter, the method will either block
// until all subfetchers spin down, or return immediately.
func (p *triePrefetcher) terminate(async bool) {
// Short circuit if the fetcher is already closed
select {
case <-p.term:
return
default:
}
// Terminate all sub-fetchers, sync or async, depending on the request
for _, fetcher := range p.fetchers {
fetcher.terminate(async)
}
close(p.term)
}
// report aggregates the pre-fetching and usage metrics and reports them.
func (p *triePrefetcher) report() {
if !metrics.Enabled() {
return
}
for _, fetcher := range p.fetchers {
fetcher.wait() // ensure the fetcher's idle before poking in its internals
if fetcher.root == p.root {
p.accountLoadReadMeter.Mark(int64(len(fetcher.seenReadAddr)))
p.accountLoadWriteMeter.Mark(int64(len(fetcher.seenWriteAddr)))
p.accountDupReadMeter.Mark(int64(fetcher.dupsRead))
p.accountDupWriteMeter.Mark(int64(fetcher.dupsWrite))
p.accountDupCrossMeter.Mark(int64(fetcher.dupsCross))
for _, key := range fetcher.usedAddr {
delete(fetcher.seenReadAddr, key)
delete(fetcher.seenWriteAddr, key)
}
p.accountWasteMeter.Mark(int64(len(fetcher.seenReadAddr) + len(fetcher.seenWriteAddr)))
} else {
p.storageLoadReadMeter.Mark(int64(len(fetcher.seenReadSlot)))
p.storageLoadWriteMeter.Mark(int64(len(fetcher.seenWriteSlot)))
p.storageDupReadMeter.Mark(int64(fetcher.dupsRead))
p.storageDupWriteMeter.Mark(int64(fetcher.dupsWrite))
p.storageDupCrossMeter.Mark(int64(fetcher.dupsCross))
for _, key := range fetcher.usedSlot {
delete(fetcher.seenReadSlot, key)
delete(fetcher.seenWriteSlot, key)
}
p.storageWasteMeter.Mark(int64(len(fetcher.seenReadSlot) + len(fetcher.seenWriteSlot)))
}
}
}
// prefetch schedules a batch of trie items to prefetch. After the prefetcher is
// closed, all the following tasks scheduled will not be executed and an error
// will be returned.
//
// prefetch is called from two locations:
//
// 1. Finalize of the state-objects storage roots. This happens at the end
// of every transaction, meaning that if several transactions touches
// upon the same contract, the parameters invoking this method may be
// repeated.
// 2. Finalize of the main account trie. This happens only once per block.
func (p *triePrefetcher) prefetch(owner common.Hash, root common.Hash, addr common.Address, addrs []common.Address, slots []common.Hash, read bool) error {
// If the state item is only being read, but reads are disabled, return
if read && p.noreads {
return nil
}
// Ensure the subfetcher is still alive
select {
case <-p.term:
return errTerminated
default:
}
id := p.trieID(owner, root)
fetcher := p.fetchers[id]
if fetcher == nil {
fetcher = newSubfetcher(p.db, p.root, owner, root, addr)
p.fetchers[id] = fetcher
}
return fetcher.schedule(addrs, slots, read)
}
// trie returns the trie matching the root hash, blocking until the fetcher of
// the given trie terminates. If no fetcher exists for the request, nil will be
// returned.
func (p *triePrefetcher) trie(owner common.Hash, root common.Hash) Trie {
// Bail if no trie was prefetched for this root
fetcher := p.fetchers[p.trieID(owner, root)]
if fetcher == nil {
log.Error("Prefetcher missed to load trie", "owner", owner, "root", root)
p.deliveryMissMeter.Mark(1)
return nil
}
// Subfetcher exists, retrieve its trie
return fetcher.peek()
}
// used marks a batch of state items used to allow creating statistics as to
// how useful or wasteful the fetcher is.
func (p *triePrefetcher) used(owner common.Hash, root common.Hash, usedAddr []common.Address, usedSlot []common.Hash) {
if fetcher := p.fetchers[p.trieID(owner, root)]; fetcher != nil {
fetcher.wait() // ensure the fetcher's idle before poking in its internals
fetcher.usedAddr = append(fetcher.usedAddr, usedAddr...)
fetcher.usedSlot = append(fetcher.usedSlot, usedSlot...)
}
}
// trieID returns an unique trie identifier consists the trie owner and root hash.
func (p *triePrefetcher) trieID(owner common.Hash, root common.Hash) string {
// The trie in verkle is only identified by state root
if p.verkle {
return p.root.Hex()
}
// The trie in merkle is either identified by state root (account trie),
// or identified by the owner and trie root (storage trie)
trieID := make([]byte, common.HashLength*2)
copy(trieID, owner.Bytes())
copy(trieID[common.HashLength:], root.Bytes())
return string(trieID)
}
// subfetcher is a trie fetcher goroutine responsible for pulling entries for a
// single trie. It is spawned when a new root is encountered and lives until the
// main prefetcher is paused and either all requested items are processed or if
// the trie being worked on is retrieved from the prefetcher.
type subfetcher struct {
db Database // Database to load trie nodes through
state common.Hash // Root hash of the state to prefetch
owner common.Hash // Owner of the trie, usually account hash
root common.Hash // Root hash of the trie to prefetch
addr common.Address // Address of the account that the trie belongs to
trie Trie // Trie being populated with nodes
tasks []*subfetcherTask // Items queued up for retrieval
lock sync.Mutex // Lock protecting the task queue
// prefetcher is a background goroutine that preloads trie nodes for a single
// trie. It deduplicates requests and stops when explicitly terminated.
type prefetcher struct {
prefetchRead bool // Whether the state read will trigger preloading
trie Trie // Trie being populated with nodes
tasks []*prefetchTask // Items queued up for retrieval
lock sync.Mutex // Lock protecting the task queue
wake chan struct{} // Wake channel if a new task is scheduled
stop chan struct{} // Channel to interrupt processing
term chan struct{} // Channel to signal interruption
seenReadAddr map[common.Address]struct{} // Tracks the accounts already loaded via read operations
seenWriteAddr map[common.Address]struct{} // Tracks the accounts already loaded via write operations
seenReadSlot map[common.Hash]struct{} // Tracks the storage already loaded via read operations
seenWriteSlot map[common.Hash]struct{} // Tracks the storage already loaded via write operations
dupsRead int // Number of duplicate preload tasks via reads only
dupsWrite int // Number of duplicate preload tasks via writes only
dupsCross int // Number of duplicate preload tasks via read-write-crosses
usedAddr []common.Address // Tracks the accounts used in the end
usedSlot []common.Hash // Tracks the storage used in the end
seenReadAddr map[common.Address]struct{} // Dedup: accounts loaded via reads
seenWriteAddr map[common.Address]struct{} // Dedup: accounts loaded via writes
seenReadSlot map[slotKey]struct{} // Dedup: slots loaded via reads
seenWriteSlot map[slotKey]struct{} // Dedup: slots loaded via writes
}
// subfetcherTask is a trie path to prefetch, tagged with whether it originates
// from a read or a write request.
type subfetcherTask struct {
read bool
addr *common.Address
slot *common.Hash
}
// newSubfetcher creates a goroutine to prefetch state items belonging to a
// particular root hash.
func newSubfetcher(db Database, state common.Hash, owner common.Hash, root common.Hash, addr common.Address) *subfetcher {
sf := &subfetcher{
db: db,
state: state,
owner: owner,
root: root,
addr: addr,
// newPrefetcher creates a background goroutine to prefetch state items from the
// given trie.
func newPrefetcher(tr Trie, prefetchRead bool) *prefetcher {
p := &prefetcher{
prefetchRead: prefetchRead,
trie: tr,
wake: make(chan struct{}, 1),
stop: make(chan struct{}),
term: make(chan struct{}),
seenReadAddr: make(map[common.Address]struct{}),
seenWriteAddr: make(map[common.Address]struct{}),
seenReadSlot: make(map[common.Hash]struct{}),
seenWriteSlot: make(map[common.Hash]struct{}),
seenReadSlot: make(map[slotKey]struct{}),
seenWriteSlot: make(map[slotKey]struct{}),
}
go sf.loop()
return sf
go p.loop()
return p
}
// schedule adds a batch of trie keys to the queue to prefetch.
func (sf *subfetcher) schedule(addrs []common.Address, slots []common.Hash, read bool) error {
// Ensure the subfetcher is still alive
// scheduleAccounts adds a batch of accounts to the prefetch queue.
func (p *prefetcher) scheduleAccounts(addrs []common.Address, read bool) error {
select {
case <-sf.term:
case <-p.term:
return errTerminated
default:
}
// Append the tasks to the current queue
sf.lock.Lock()
for _, addr := range addrs {
sf.tasks = append(sf.tasks, &subfetcherTask{read: read, addr: &addr})
if !p.prefetchRead && read {
return nil
}
for _, slot := range slots {
sf.tasks = append(sf.tasks, &subfetcherTask{read: read, slot: &slot})
}
sf.lock.Unlock()
p.lock.Lock()
p.tasks = append(p.tasks, &prefetchTask{
read: read,
kind: kindAccount,
accounts: addrs,
})
p.lock.Unlock()
// Notify the background thread to execute scheduled tasks
select {
case sf.wake <- struct{}{}:
// Wake signal sent
case p.wake <- struct{}{}:
default:
// Wake signal not sent as a previous one is already queued
}
return nil
}
// wait blocks until the subfetcher terminates. This method is used to block on
// an async termination before accessing internal fields from the fetcher.
func (sf *subfetcher) wait() {
<-sf.term
}
// peek retrieves the fetcher's trie, populated with any pre-fetched data. The
// returned trie will be a shallow copy, so modifying it will break subsequent
// peeks for the original data. The method will block until all the scheduled
// data has been loaded and the fethcer terminated.
func (sf *subfetcher) peek() Trie {
// Block until the fetcher terminates, then retrieve the trie
sf.wait()
return sf.trie
}
// terminate requests the subfetcher to stop accepting new tasks and spin down
// as soon as everything is loaded. Depending on the async parameter, the method
// will either block until all disk loads finish or return immediately.
func (sf *subfetcher) terminate(async bool) {
// scheduleSlots adds a batch of storage slots to the prefetch queue.
func (p *prefetcher) scheduleSlots(addr common.Address, slots []common.Hash, read bool) error {
select {
case <-sf.stop:
case <-p.term:
return errTerminated
default:
close(sf.stop)
}
if async {
return
if !p.prefetchRead && read {
return nil
}
<-sf.term
}
p.lock.Lock()
p.tasks = append(p.tasks, &prefetchTask{
read: read,
kind: kindStorage,
account: addr,
slots: slots,
})
p.lock.Unlock()
// openTrie resolves the target trie from database for prefetching.
func (sf *subfetcher) openTrie() error {
// Open the verkle tree if the sub-fetcher is in verkle mode. Note, there is
// only a single fetcher for verkle.
if sf.db.TrieDB().IsVerkle() {
tr, err := sf.db.OpenTrie(sf.state)
if err != nil {
log.Warn("Trie prefetcher failed opening verkle trie", "root", sf.root, "err", err)
return err
}
sf.trie = tr
return nil
select {
case p.wake <- struct{}{}:
default:
}
// Open the merkle tree if the sub-fetcher is in merkle mode
if sf.owner == (common.Hash{}) {
tr, err := sf.db.OpenTrie(sf.state)
if err != nil {
log.Warn("Trie prefetcher failed opening account trie", "root", sf.root, "err", err)
return err
}
sf.trie = tr
return nil
}
tr, err := sf.db.OpenStorageTrie(sf.state, sf.addr, sf.root, nil)
if err != nil {
log.Warn("Trie prefetcher failed opening storage trie", "root", sf.root, "err", err)
return err
}
sf.trie = tr
return nil
}
// loop loads newly-scheduled trie tasks as they are received and loads them, stopping
// when requested.
func (sf *subfetcher) loop() {
// No matter how the loop stops, signal anyone waiting that it's terminated
defer close(sf.term)
if err := sf.openTrie(); err != nil {
return
// terminate requests the prefetcher to stop and optionally waits for it.
func (p *prefetcher) terminate() {
select {
case <-p.stop:
default:
close(p.stop)
}
<-p.term
}
// loop processes prefetch tasks until terminated.
func (p *prefetcher) loop() {
defer close(p.term)
for {
select {
case <-sf.wake:
// Execute all remaining tasks in a single run
sf.lock.Lock()
tasks := sf.tasks
sf.tasks = nil
sf.lock.Unlock()
case <-p.wake:
p.lock.Lock()
tasks := p.tasks
p.tasks = nil
p.lock.Unlock()
var (
addresses []common.Address
slots [][]byte
addrs []common.Address
slots = make(map[common.Address][][]byte)
)
for _, task := range tasks {
if task.addr != nil {
key := *task.addr
if task.read {
if _, ok := sf.seenReadAddr[key]; ok {
sf.dupsRead++
if task.kind == kindAccount {
for _, addr := range task.accounts {
if p.dedupAddr(addr, task.read) {
continue
}
if _, ok := sf.seenWriteAddr[key]; ok {
sf.dupsCross++
continue
}
sf.seenReadAddr[key] = struct{}{}
} else {
if _, ok := sf.seenReadAddr[key]; ok {
sf.dupsCross++
continue
}
if _, ok := sf.seenWriteAddr[key]; ok {
sf.dupsWrite++
continue
}
sf.seenWriteAddr[key] = struct{}{}
addrs = append(addrs, addr)
}
addresses = append(addresses, *task.addr)
} else {
key := *task.slot
if task.read {
if _, ok := sf.seenReadSlot[key]; ok {
sf.dupsRead++
for _, slot := range task.slots {
if p.dedupSlot(task.account, slot, task.read) {
continue
}
if _, ok := sf.seenWriteSlot[key]; ok {
sf.dupsCross++
continue
}
sf.seenReadSlot[key] = struct{}{}
} else {
if _, ok := sf.seenReadSlot[key]; ok {
sf.dupsCross++
continue
}
if _, ok := sf.seenWriteSlot[key]; ok {
sf.dupsWrite++
continue
}
sf.seenWriteSlot[key] = struct{}{}
slots[task.account] = append(slots[task.account], slot.Bytes())
}
slots = append(slots, key.Bytes())
}
}
if len(addresses) != 0 {
if err := sf.trie.PrefetchAccount(addresses); err != nil {
if len(addrs) > 0 {
if err := p.trie.PrefetchAccount(addrs); err != nil {
log.Error("Failed to prefetch accounts", "err", err)
}
}
if len(slots) != 0 {
if err := sf.trie.PrefetchStorage(sf.addr, slots); err != nil {
for addr, keys := range slots {
if err := p.trie.PrefetchStorage(addr, keys); err != nil {
log.Error("Failed to prefetch storage", "err", err)
}
}
case <-sf.stop:
// Termination is requested, abort if no more tasks are pending. If
// there are some, exhaust them first.
sf.lock.Lock()
done := sf.tasks == nil
sf.lock.Unlock()
case <-p.stop:
p.lock.Lock()
done := p.tasks == nil
p.lock.Unlock()
if done {
return
}
// Some tasks are pending, loop and pick them up (that wake branch
// will be selected eventually, whilst stop remains closed to this
// branch will also run afterwards).
}
}
}
// dedupAddr returns true if addr was already seen for this read/write category.
func (p *prefetcher) dedupAddr(addr common.Address, read bool) bool {
if read {
if _, ok := p.seenReadAddr[addr]; ok {
return true
}
if _, ok := p.seenWriteAddr[addr]; ok {
return true
}
p.seenReadAddr[addr] = struct{}{}
} else {
if _, ok := p.seenReadAddr[addr]; ok {
return true
}
if _, ok := p.seenWriteAddr[addr]; ok {
return true
}
p.seenWriteAddr[addr] = struct{}{}
}
return false
}
// dedupSlot returns true if slot was already seen for this read/write category.
func (p *prefetcher) dedupSlot(addr common.Address, slot common.Hash, read bool) bool {
key := slotKey{addr: addr, slot: slot}
if read {
if _, ok := p.seenReadSlot[key]; ok {
return true
}
if _, ok := p.seenWriteSlot[key]; ok {
return true
}
p.seenReadSlot[key] = struct{}{}
} else {
if _, ok := p.seenReadSlot[key]; ok {
return true
}
if _, ok := p.seenWriteSlot[key]; ok {
return true
}
p.seenWriteSlot[key] = struct{}{}
}
return false
}

View file

@ -21,86 +21,71 @@ import (
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/tracing"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/internal/testrand"
"github.com/ethereum/go-ethereum/triedb"
"github.com/ethereum/go-ethereum/trie"
"github.com/holiman/uint256"
)
func filledStateDB() *StateDB {
state, _ := New(types.EmptyRootHash, NewDatabaseForTesting())
// Create an account and check if the retrieved balance is correct
addr := common.HexToAddress("0xaffeaffeaffeaffeaffeaffeaffeaffeaffeaffe")
skey := common.HexToHash("aaa")
sval := common.HexToHash("bbb")
state.SetBalance(addr, uint256.NewInt(42), tracing.BalanceChangeUnspecified) // Change the account trie
state.SetCode(addr, []byte("hello"), tracing.CodeChangeUnspecified) // Change an external metadata
state.SetState(addr, skey, sval) // Change the storage trie
state.SetBalance(addr, uint256.NewInt(42), tracing.BalanceChangeUnspecified)
state.SetCode(addr, []byte("hello"), tracing.CodeChangeUnspecified)
state.SetState(addr, skey, sval)
for i := 0; i < 100; i++ {
sk := common.BigToHash(big.NewInt(int64(i)))
state.SetState(addr, sk, sk) // Change the storage trie
state.SetState(addr, sk, sk)
}
return state
}
func TestUseAfterTerminate(t *testing.T) {
func TestSubfetcherUseAfterTerminate(t *testing.T) {
db := filledStateDB()
prefetcher := newTriePrefetcher(db.db, db.originalRoot, "", true)
skey := common.HexToHash("aaa")
if err := prefetcher.prefetch(common.Hash{}, db.originalRoot, common.Address{}, nil, []common.Hash{skey}, false); err != nil {
t.Errorf("Prefetch failed before terminate: %v", err)
}
prefetcher.terminate(false)
if err := prefetcher.prefetch(common.Hash{}, db.originalRoot, common.Address{}, nil, []common.Hash{skey}, false); err == nil {
t.Errorf("Prefetch succeeded after terminate: %v", err)
}
if tr := prefetcher.trie(common.Hash{}, db.originalRoot); tr == nil {
t.Errorf("Prefetcher returned nil trie after terminate")
}
}
func TestVerklePrefetcher(t *testing.T) {
disk := rawdb.NewMemoryDatabase()
db := triedb.NewDatabase(disk, triedb.VerkleDefaults)
sdb := NewDatabase(db, nil)
state, err := New(types.EmptyRootHash, sdb)
// Open a trie and create a subfetcher for it.
id := trie.StateTrieID(db.originalRoot)
tr, err := trie.NewStateTrie(id, db.db.TrieDB())
if err != nil {
t.Fatalf("failed to initialize state: %v", err)
t.Fatalf("Failed to open trie: %v", err)
}
// Create an account and check if the retrieved balance is correct
addr := testrand.Address()
skey := testrand.Hash()
sval := testrand.Hash()
sf := newPrefetcher(tr, false)
addr := common.HexToAddress("0xaffeaffeaffeaffeaffeaffeaffeaffeaffeaffe")
state.SetBalance(addr, uint256.NewInt(42), tracing.BalanceChangeUnspecified) // Change the account trie
state.SetCode(addr, []byte("hello"), tracing.CodeChangeUnspecified) // Change an external metadata
state.SetState(addr, skey, sval) // Change the storage trie
root, _ := state.Commit(0, true, false)
// Scheduling before termination should succeed.
if err := sf.scheduleAccounts([]common.Address{addr}, false); err != nil {
t.Fatalf("Schedule failed before terminate: %v", err)
}
// Terminate synchronously — waits for pending tasks.
sf.terminate()
state, _ = New(root, sdb)
fetcher := newTriePrefetcher(sdb, root, "", false)
// Read account
fetcher.prefetch(common.Hash{}, root, common.Address{}, []common.Address{addr}, nil, false)
// Read storage slot
fetcher.prefetch(crypto.Keccak256Hash(addr.Bytes()), common.Hash{}, addr, nil, []common.Hash{skey}, false)
fetcher.terminate(false)
accountTrie := fetcher.trie(common.Hash{}, root)
storageTrie := fetcher.trie(crypto.Keccak256Hash(addr.Bytes()), common.Hash{})
rootA := accountTrie.Hash()
rootB := storageTrie.Hash()
if rootA != rootB {
t.Fatal("Two different tries are retrieved")
// Scheduling after termination should fail.
if err := sf.scheduleAccounts([]common.Address{addr}, false); err == nil {
t.Fatal("Schedule succeeded after terminate")
}
}
func TestWrapTriePrefetch(t *testing.T) {
db := filledStateDB()
// Create a wrapTrie with prefetching enabled.
id := trie.StateTrieID(db.originalRoot)
tr, err := newWrapTrie(id, db.db.TrieDB(), true, true)
if err != nil {
t.Fatalf("Failed to create wrapTrie: %v", err)
}
addr := common.HexToAddress("0xaffeaffeaffeaffeaffeaffeaffeaffeaffeaffe")
// Schedule some prefetch work.
tr.prefetchAccounts([]common.Address{addr}, false)
// Terminate and verify the trie is usable.
tr.term()
if tr.Hash() == (common.Hash{}) {
t.Fatal("wrapTrie hash is zero after prefetch")
}
}

View file

@ -232,38 +232,31 @@ func (api *DebugAPI) StorageRangeAt(ctx context.Context, blockNrOrHash rpc.Block
}
func storageRangeAt(statedb *state.StateDB, root common.Hash, address common.Address, start []byte, maxResult int) (StorageRangeResult, error) {
storageRoot := statedb.GetStorageRoot(address)
if storageRoot == types.EmptyRootHash || storageRoot == (common.Hash{}) {
return StorageRangeResult{}, nil // empty storage
it, err := statedb.Database().Iteratee(root)
if err != nil {
return StorageRangeResult{}, err
}
storageIt, err := it.NewStorageIterator(crypto.Keccak256Hash(address.Bytes()), common.BytesToHash(start))
if err != nil {
return StorageRangeResult{}, err
}
// TODO(rjl493456442) it's problematic for traversing the state with in-memory
// state mutations, specifically txIndex != 0.
id := trie.StorageTrieID(root, crypto.Keccak256Hash(address.Bytes()), storageRoot)
tr, err := trie.NewStateTrie(id, statedb.Database().TrieDB())
if err != nil {
return StorageRangeResult{}, err
}
trieIt, err := tr.NodeIterator(start)
if err != nil {
return StorageRangeResult{}, err
}
it := trie.NewIterator(trieIt)
result := StorageRangeResult{Storage: storageMap{}}
for i := 0; i < maxResult && it.Next(); i++ {
_, content, _, err := rlp.Split(it.Value)
for i := 0; i < maxResult && storageIt.Next(); i++ {
_, content, _, err := rlp.Split(storageIt.Slot())
if err != nil {
return StorageRangeResult{}, err
}
e := storageEntry{Value: common.BytesToHash(content)}
if preimage := tr.GetKey(it.Key); preimage != nil {
preimage := common.BytesToHash(preimage)
if preimage, err := storageIt.Key(); err == nil {
e.Key = &preimage
}
result.Storage[common.BytesToHash(it.Key)] = e
result.Storage[storageIt.Hash()] = e
}
// Add the 'next key' so clients can continue downloading.
if it.Next() {
next := common.BytesToHash(it.Key)
if storageIt.Next() {
next := storageIt.Hash()
result.NextKey = &next
}
return result, nil

View file

@ -388,17 +388,15 @@ func (api *BlockChainAPI) GetProof(ctx context.Context, address common.Address,
return nil, err
}
codeHash := statedb.GetCodeHash(address)
storageRoot := statedb.GetStorageRoot(address)
hasher, err := statedb.Database().Hasher(header.Root)
if err != nil {
return nil, err
}
prover, ok := hasher.(state.Prover)
if !ok {
return nil, errors.New("state proving is not supported")
}
if len(keys) > 0 {
var storageTrie state.Trie
if storageRoot != types.EmptyRootHash && storageRoot != (common.Hash{}) {
st, err := statedb.Database().OpenStorageTrie(header.Root, address, storageRoot, nil)
if err != nil {
return nil, err
}
storageTrie = st
}
// Create the proofs for the storageKeys.
for i, key := range keys {
if err := ctx.Err(); err != nil {
@ -414,12 +412,8 @@ func (api *BlockChainAPI) GetProof(ctx context.Context, address common.Address,
} else {
outputKey = hexutil.Encode(key[:])
}
if storageTrie == nil {
storageProof[i] = StorageResult{outputKey, &hexutil.Big{}, []string{}}
continue
}
var proof proofList
if err := storageTrie.Prove(crypto.Keccak256(key.Bytes()), &proof); err != nil {
if err := prover.ProveStorage(address, crypto.Keccak256Hash(key.Bytes()), &proof); err != nil {
return nil, err
}
value := (*hexutil.Big)(statedb.GetState(address, key).Big())
@ -427,12 +421,8 @@ func (api *BlockChainAPI) GetProof(ctx context.Context, address common.Address,
}
}
// Create the accountProof.
tr, err := statedb.Database().OpenTrie(header.Root)
if err != nil {
return nil, err
}
var accountProof proofList
if err := tr.Prove(crypto.Keccak256(address.Bytes()), &accountProof); err != nil {
if err := prover.ProveAccount(address, &accountProof); err != nil {
return nil, err
}
balance := statedb.GetBalance(address).ToBig()
@ -442,7 +432,7 @@ func (api *BlockChainAPI) GetProof(ctx context.Context, address common.Address,
Balance: (*hexutil.Big)(balance),
CodeHash: codeHash,
Nonce: hexutil.Uint64(statedb.GetNonce(address)),
StorageHash: storageRoot,
//StorageHash: storageRoot, // TODO(rjl493456442)
StorageProof: storageProof,
}, statedb.Error()
}

View file

@ -324,7 +324,10 @@ func (miner *Miner) prepareWork(ctx context.Context, genParams *generateParams,
// makeEnv creates a new environment for the sealing block.
func (miner *Miner) makeEnv(parent *types.Header, header *types.Header, coinbase common.Address, witness bool) (*environment, error) {
// Retrieve the parent state to execute on top.
state, err := miner.chain.StateAt(parent.Root)
state, err := miner.chain.StateWithConfig(parent.Root, core.StateConfig{
Prefetch: true,
PrefetchRead: witness,
})
if err != nil {
return nil, err
}
@ -334,8 +337,8 @@ func (miner *Miner) makeEnv(parent *types.Header, header *types.Header, coinbase
if err != nil {
return nil, err
}
state.TraceWitness(bundle)
}
state.StartPrefetcher("miner", bundle)
// Note the passed coinbase may be different with header.Coinbase.
return &environment{
signer: types.MakeSigner(miner.chainConfig, header.Number, header.Time),

View file

@ -17,7 +17,9 @@
package bintrie
import (
"bytes"
"errors"
"fmt"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/trie"
@ -38,15 +40,341 @@ type binaryNodeIterator struct {
stack []binaryNodeIteratorState
}
func newBinaryNodeIterator(t *BinaryTrie, _ []byte) (trie.NodeIterator, error) {
func newBinaryNodeIterator(t *BinaryTrie, start []byte) (trie.NodeIterator, error) {
if t.Hash() == zero {
return &binaryNodeIterator{trie: t, lastErr: errIteratorEnd}, nil
}
it := &binaryNodeIterator{trie: t, current: t.root}
// it.err = it.seek(start)
if len(start) > 0 {
if err := it.seek(start); err != nil {
return nil, err
}
}
return it, nil
}
// seek positions the iterator so that the next call to Next(true) advances to
// the first leaf with key >= start. It walks down the trie following start's
// bit path, building the iterator stack along the way. When the chosen path
// dead-ends (Empty, missing child, or a stem strictly less than start), the
// implementation backtracks through the existing stack to find the next
// in-order subtree and descends to its leftmost leaf.
//
// A nil/empty start is a no-op; iteration begins at the trie root as usual.
//
// This is required for resumable bintrie generators (snapshot generation,
// pathdb flat-state population) so that an interrupted run can pick up where
// it left off after a crash or graceful shutdown.
func (it *binaryNodeIterator) seek(start []byte) error {
if len(start) == 0 {
return nil
}
// Pad start to a 32-byte key (the trie's natural key length).
var key [32]byte
copy(key[:], start)
// Reset state
it.stack = it.stack[:0]
it.current = nil
it.lastErr = nil
root := it.trie.root
if root == nil {
it.lastErr = errIteratorEnd
return nil
}
if _, isEmpty := root.(Empty); isEmpty {
it.lastErr = errIteratorEnd
return nil
}
// Resolve the root if it's a HashedNode
resolved, err := it.resolveIfHashed(root, nil, 0)
if err != nil {
return err
}
if resolved == nil {
it.lastErr = errIteratorEnd
return nil
}
if resolved != root {
it.trie.root = resolved
root = resolved
}
return it.seekDescend(root, key[:])
}
// seekDescend walks down from `node` following key's bit path. For each
// InternalNode encountered, it pushes the node onto the stack with Index set
// to the bit it descended into (0 for left, 1 for right) and recurses into
// the chosen child. On a StemNode it positions at the appropriate value
// offset and returns. On a dead end (Empty, nil, stem < key), it delegates
// to seekBacktrack to find the next valid subtree.
func (it *binaryNodeIterator) seekDescend(node BinaryNode, key []byte) error {
for {
switch n := node.(type) {
case *InternalNode:
depth := n.depth
if depth >= 31*8 {
return errors.New("seek: internal node too deep")
}
bit := key[depth/8] >> (7 - uint(depth%8)) & 1
// Push this internal node with Index = chosen bit. The Next()
// loop interprets Index as "the side currently being explored",
// so this is consistent with normal iteration state.
it.stack = append(it.stack, binaryNodeIteratorState{Node: n, Index: int(bit)})
it.current = n
var child BinaryNode
if bit == 0 {
child = n.left
} else {
child = n.right
}
if child == nil {
return it.seekBacktrack()
}
if _, isEmpty := child.(Empty); isEmpty {
return it.seekBacktrack()
}
// Resolve a hashed child using the current key as the path source.
resolved, err := it.resolveIfHashed(child, key, depth+1)
if err != nil {
return err
}
if resolved == nil {
return it.seekBacktrack()
}
if resolved != child {
if bit == 0 {
n.left = resolved
} else {
n.right = resolved
}
}
node = resolved
case *StemNode:
cmp := bytes.Compare(n.Stem, key[:StemSize])
if cmp < 0 {
// Stem is strictly before our target. Don't push it; backtrack
// to find the next subtree to the right.
return it.seekBacktrack()
}
startOffset := 0
if cmp == 0 {
startOffset = int(key[StemSize])
}
it.stack = append(it.stack, binaryNodeIteratorState{Node: n, Index: startOffset})
it.current = n
return nil
default:
return fmt.Errorf("seek: unexpected node type %T", node)
}
}
}
// seekBacktrack walks the existing stack backward looking for the first
// InternalNode whose right subtree hasn't been considered yet. If found, it
// flips that node's Index to 1 and descends into the leftmost leaf of the
// right subtree. If no such ancestor exists, it sets errIteratorEnd.
func (it *binaryNodeIterator) seekBacktrack() error {
for len(it.stack) > 0 {
top := &it.stack[len(it.stack)-1]
n, ok := top.Node.(*InternalNode)
if !ok {
// Not an InternalNode (e.g., a StemNode pushed elsewhere). Pop and
// continue. seekDescend never pushes non-internal nodes before
// returning, so this is a defensive fallback.
it.stack = it.stack[:len(it.stack)-1]
continue
}
if top.Index == 0 {
// We were positioned in the left subtree. Try the right sibling.
top.Index = 1
right := n.right
if right == nil {
it.stack = it.stack[:len(it.stack)-1]
continue
}
if _, isEmpty := right.(Empty); isEmpty {
it.stack = it.stack[:len(it.stack)-1]
continue
}
// Resolve the right child if it's hashed. Use a synthetic path
// where the bit at this depth is 1 (we're descending right).
resolved, err := it.resolveRightChild(n)
if err != nil {
return err
}
if resolved == nil {
it.stack = it.stack[:len(it.stack)-1]
continue
}
if resolved != right {
n.right = resolved
right = resolved
}
it.current = right
return it.seekLeftmost(right)
}
// Index == 1: we were already in the right subtree. Both subtrees of
// this internal node have been considered. Pop and try higher.
it.stack = it.stack[:len(it.stack)-1]
}
it.lastErr = errIteratorEnd
return nil
}
// seekLeftmost descends into the leftmost leaf of the subtree rooted at
// `node`, pushing internal nodes onto the stack with Index = 0 (left first).
// It positions the iterator at a StemNode with Index = 0, ready to scan
// values from offset 0.
func (it *binaryNodeIterator) seekLeftmost(node BinaryNode) error {
for {
switch n := node.(type) {
case *InternalNode:
it.stack = append(it.stack, binaryNodeIteratorState{Node: n, Index: 0})
it.current = n
child := n.left
pickedRight := false
if child == nil {
child = n.right
pickedRight = true
}
if child != nil {
if _, isEmpty := child.(Empty); isEmpty {
if !pickedRight {
child = n.right
pickedRight = true
}
if child != nil {
if _, isEmpty2 := child.(Empty); isEmpty2 {
child = nil
}
}
}
}
if child == nil {
// Both children are empty/nil — degenerate. Pop and let seek
// backtrack handle it. (This shouldn't normally happen for a
// well-formed trie because internal nodes always have at least
// two non-empty children at construction time.)
it.stack = it.stack[:len(it.stack)-1]
return it.seekBacktrack()
}
if pickedRight {
it.stack[len(it.stack)-1].Index = 1
}
// Resolve hashed child
resolved, err := it.resolveIfHashed(child, nil, n.depth+1)
if err != nil {
return err
}
if resolved == nil {
// Resolution failed; treat as empty and try the other side.
if pickedRight {
// Already tried right; nothing left.
it.stack = it.stack[:len(it.stack)-1]
return it.seekBacktrack()
}
// Try right
right := n.right
if right == nil {
it.stack = it.stack[:len(it.stack)-1]
return it.seekBacktrack()
}
if _, isEmpty := right.(Empty); isEmpty {
it.stack = it.stack[:len(it.stack)-1]
return it.seekBacktrack()
}
it.stack[len(it.stack)-1].Index = 1
resolved, err = it.resolveIfHashed(right, nil, n.depth+1)
if err != nil {
return err
}
if resolved == nil {
it.stack = it.stack[:len(it.stack)-1]
return it.seekBacktrack()
}
n.right = resolved
node = resolved
continue
}
if resolved != child {
if pickedRight {
n.right = resolved
} else {
n.left = resolved
}
}
node = resolved
case *StemNode:
it.stack = append(it.stack, binaryNodeIteratorState{Node: n, Index: 0})
it.current = n
return nil
default:
return fmt.Errorf("seekLeftmost: unexpected node type %T", node)
}
}
}
// resolveIfHashed checks whether the given node is a HashedNode and, if so,
// uses the trie's nodeResolver to load and deserialize the underlying node.
// Returns the resolved node or the original if no resolution was needed.
// Returns (nil, nil) if the resolver returned no data (e.g., zero hash).
//
// keyForPath supplies the bit path used to address the node; for the root
// this is unused (path is empty). depth is the depth of the node being
// resolved, used for the deserialized node's internal depth field.
func (it *binaryNodeIterator) resolveIfHashed(node BinaryNode, keyForPath []byte, depth int) (BinaryNode, error) {
hn, ok := node.(HashedNode)
if !ok {
return node, nil
}
var path []byte
if depth > 0 && keyForPath != nil {
var err error
path, err = keyToPath(depth-1, keyForPath)
if err != nil {
return nil, err
}
}
data, err := it.trie.nodeResolver(path, common.Hash(hn))
if err != nil {
return nil, err
}
if data == nil {
return nil, nil
}
resolved, err := DeserializeNodeWithHash(data, depth, common.Hash(hn))
if err != nil {
return nil, err
}
return resolved, nil
}
// resolveRightChild resolves the right child of an InternalNode using a
// synthetic path that ends in bit=1. This is used by seekBacktrack when
// flipping from left to right exploration.
func (it *binaryNodeIterator) resolveRightChild(parent *InternalNode) (BinaryNode, error) {
right := parent.right
if _, ok := right.(HashedNode); !ok {
return right, nil
}
// Build a 32-byte key whose bit at parent.depth is 1; rest doesn't matter
// for the path computation.
var key [32]byte
key[parent.depth/8] |= 1 << (7 - uint(parent.depth%8))
return it.resolveIfHashed(right, key[:], parent.depth+1)
}
// Next moves the iterator to the next node. If the parameter is false, any child
// nodes will be skipped.
func (it *binaryNodeIterator) Next(descend bool) bool {

View file

@ -18,6 +18,7 @@ package bintrie
import (
"bytes"
"slices"
"testing"
"github.com/ethereum/go-ethereum/common"
@ -206,6 +207,241 @@ func TestIteratorDeepTree(t *testing.T) {
}
}
// collectLeaves iterates the trie and returns all (key, value) pairs visited.
func collectLeaves(t *testing.T, tr *BinaryTrie, start []byte) [][2][]byte {
t.Helper()
it, err := newBinaryNodeIterator(tr, start)
if err != nil {
t.Fatal(err)
}
var out [][2][]byte
for it.Next(true) {
if it.Leaf() {
k := slices.Clone(it.LeafKey())
v := slices.Clone(it.LeafBlob())
out = append(out, [2][]byte{k, v})
}
}
if it.Error() != nil {
t.Fatalf("iterator error: %v", it.Error())
}
return out
}
// TestSeekEmptyStart verifies that seek with a nil/empty start behaves like
// a fresh iterator (no skipping).
func TestSeekEmptyStart(t *testing.T) {
tr := makeTrie(t, [][2]common.Hash{
{common.HexToHash("0000000000000000000000000000000000000000000000000000000000000001"), oneKey},
{common.HexToHash("8000000000000000000000000000000000000000000000000000000000000001"), oneKey},
})
// Both nil and empty slice should iterate everything.
if got := len(collectLeaves(t, tr, nil)); got != 2 {
t.Fatalf("nil start: expected 2 leaves, got %d", got)
}
if got := len(collectLeaves(t, tr, []byte{})); got != 2 {
t.Fatalf("empty start: expected 2 leaves, got %d", got)
}
}
// TestSeekToExactKey verifies that seeking to an existing leaf key positions
// the iterator at that exact leaf.
func TestSeekToExactKey(t *testing.T) {
keys := [][2]common.Hash{
{common.HexToHash("0000000000000000000000000000000000000000000000000000000000000001"), oneKey},
{common.HexToHash("0000000000000000000000000000000000000000000000000000000000000002"), twoKey},
{common.HexToHash("8000000000000000000000000000000000000000000000000000000000000001"), oneKey},
}
tr := makeTrie(t, keys)
// Seek to the second key. We expect to see [key2, key3].
start := keys[1][0]
got := collectLeaves(t, tr, start[:])
if len(got) != 2 {
t.Fatalf("expected 2 leaves after seek to %x, got %d", start, len(got))
}
if !bytes.Equal(got[0][0], keys[1][0][:]) {
t.Fatalf("first leaf after seek: got %x, want %x", got[0][0], keys[1][0])
}
if !bytes.Equal(got[1][0], keys[2][0][:]) {
t.Fatalf("second leaf after seek: got %x, want %x", got[1][0], keys[2][0])
}
}
// TestSeekToBetweenKeys verifies that seeking to a key that doesn't exist
// positions the iterator at the next existing key (in-order).
func TestSeekToBetweenKeys(t *testing.T) {
keys := [][2]common.Hash{
{common.HexToHash("0000000000000000000000000000000000000000000000000000000000000001"), oneKey},
{common.HexToHash("0000000000000000000000000000000000000000000000000000000000000005"), twoKey},
{common.HexToHash("8000000000000000000000000000000000000000000000000000000000000001"), oneKey},
}
tr := makeTrie(t, keys)
// Seek to a key between key0 and key1: should land at key1.
between := common.HexToHash("0000000000000000000000000000000000000000000000000000000000000003")
got := collectLeaves(t, tr, between[:])
if len(got) != 2 {
t.Fatalf("expected 2 leaves after seek between, got %d", len(got))
}
if !bytes.Equal(got[0][0], keys[1][0][:]) {
t.Fatalf("first leaf: got %x, want %x", got[0][0], keys[1][0])
}
if !bytes.Equal(got[1][0], keys[2][0][:]) {
t.Fatalf("second leaf: got %x, want %x", got[1][0], keys[2][0])
}
}
// TestSeekIntoEmptySubtree verifies that seeking into a subtree where the
// chosen path is empty correctly backtracks to the next populated subtree.
func TestSeekIntoEmptySubtree(t *testing.T) {
// Build a trie with stems split across the bit-0 and bit-1 subtrees.
keys := [][2]common.Hash{
{common.HexToHash("0000000000000000000000000000000000000000000000000000000000000001"), oneKey},
{common.HexToHash("8000000000000000000000000000000000000000000000000000000000000001"), twoKey},
}
tr := makeTrie(t, keys)
// Seek to a key in a subtree that's entirely missing (e.g., 0x40...).
// The high bit is 0, so we'd descend left, but the left subtree only has
// keys with the FIRST bit being 0 — and the seek bit pattern would walk
// into a position that has no leaves at or after it on the left side,
// requiring backtrack to the right subtree.
missing := common.HexToHash("4000000000000000000000000000000000000000000000000000000000000001")
got := collectLeaves(t, tr, missing[:])
// Should land at key1 (the right subtree leaf).
if len(got) != 1 {
t.Fatalf("expected 1 leaf after seek into missing subtree, got %d", len(got))
}
if !bytes.Equal(got[0][0], keys[1][0][:]) {
t.Fatalf("leaf: got %x, want %x", got[0][0], keys[1][0])
}
}
// TestSeekPastEnd verifies that seeking past the last key returns no leaves.
func TestSeekPastEnd(t *testing.T) {
keys := [][2]common.Hash{
{common.HexToHash("0000000000000000000000000000000000000000000000000000000000000001"), oneKey},
{common.HexToHash("0000000000000000000000000000000000000000000000000000000000000002"), oneKey},
}
tr := makeTrie(t, keys)
// Seek past the maximum key.
beyond := common.HexToHash("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff")
got := collectLeaves(t, tr, beyond[:])
if len(got) != 0 {
t.Fatalf("expected 0 leaves after seek past end, got %d: %x", len(got), got)
}
}
// TestSeekWithinSameStem verifies that seeking within a single stem (multiple
// values at different offsets) positions correctly at the requested offset.
func TestSeekWithinSameStem(t *testing.T) {
// All three keys share the same stem; only the last byte differs.
keys := [][2]common.Hash{
{common.HexToHash("0000000000000000000000000000000000000000000000000000000000000001"), oneKey},
{common.HexToHash("0000000000000000000000000000000000000000000000000000000000000005"), twoKey},
{common.HexToHash("00000000000000000000000000000000000000000000000000000000000000ff"), oneKey},
}
tr := makeTrie(t, keys)
// Seek to offset 5: should yield keys 1 (offset 5) and 2 (offset 0xff).
start := common.HexToHash("0000000000000000000000000000000000000000000000000000000000000005")
got := collectLeaves(t, tr, start[:])
if len(got) != 2 {
t.Fatalf("expected 2 leaves, got %d", len(got))
}
if got[0][0][31] != 0x05 {
t.Fatalf("first leaf offset: got 0x%02x, want 0x05", got[0][0][31])
}
if got[1][0][31] != 0xff {
t.Fatalf("second leaf offset: got 0x%02x, want 0xff", got[1][0][31])
}
// Seek to offset 6 (between 5 and 0xff): should yield only key 2.
start[31] = 0x06
got = collectLeaves(t, tr, start[:])
if len(got) != 1 {
t.Fatalf("expected 1 leaf after seek to offset 6, got %d", len(got))
}
if got[0][0][31] != 0xff {
t.Fatalf("leaf offset: got 0x%02x, want 0xff", got[0][0][31])
}
}
// TestSeekResumeSimulation simulates a generator interruption: iterate halfway,
// extract the last leaf key, build a fresh iterator, seek to the next key, and
// verify that the resumed iteration produces the remaining leaves.
func TestSeekResumeSimulation(t *testing.T) {
// Construct a deterministic set of keys.
var keys [][2]common.Hash
for i := range 16 {
var k common.Hash
k[0] = byte(i << 4) // distribute across the high nibble
k[31] = 0x01
keys = append(keys, [2]common.Hash{k, oneKey})
}
tr := makeTrie(t, keys)
// First pass: collect all leaves.
all := collectLeaves(t, tr, nil)
if len(all) != 16 {
t.Fatalf("first pass: expected 16 leaves, got %d", len(all))
}
// Stop after the 7th leaf and resume.
stopIdx := 7
lastKey := all[stopIdx][0]
// Resume: seek to the byte AFTER lastKey (we use lastKey + 1 in the last
// byte; for our keys this is sufficient because each key's last byte is
// 0x01 and we want to go to the NEXT stem).
resumeKey := slices.Clone(lastKey)
// Increment the last byte; if it overflows, that's fine for these keys
// because all our last bytes are 0x01.
resumeKey[31]++
// But actually we want to start AT lastKey + 1, which for our keys means
// we want the NEXT stem. Since each stem has only one value at offset 0x01
// and we want everything strictly after lastKey, set offset to 0x02.
got := collectLeaves(t, tr, resumeKey)
if len(got) != len(all)-stopIdx-1 {
t.Fatalf("resume: expected %d leaves, got %d", len(all)-stopIdx-1, len(got))
}
for i, leaf := range got {
want := all[stopIdx+1+i]
if !bytes.Equal(leaf[0], want[0]) {
t.Fatalf("resume leaf %d: got %x, want %x", i, leaf[0], want[0])
}
}
}
// TestSeekDeepTree verifies seek works on a tree with a long shared prefix.
func TestSeekDeepTree(t *testing.T) {
keys := [][2]common.Hash{
{common.HexToHash("0000000000C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0"), oneKey},
{common.HexToHash("0000000000E00000000000000000000000000000000000000000000000000000"), twoKey},
}
tr := makeTrie(t, keys)
// Seek to the first key exactly.
got := collectLeaves(t, tr, keys[0][0][:])
if len(got) != 2 {
t.Fatalf("seek to first: expected 2 leaves, got %d", len(got))
}
if !bytes.Equal(got[0][0], keys[0][0][:]) {
t.Fatalf("first leaf: got %x, want %x", got[0][0], keys[0][0])
}
// Seek to the second key exactly.
got = collectLeaves(t, tr, keys[1][0][:])
if len(got) != 1 {
t.Fatalf("seek to second: expected 1 leaf, got %d", len(got))
}
if !bytes.Equal(got[0][0], keys[1][0][:]) {
t.Fatalf("leaf: got %x, want %x", got[0][0], keys[1][0])
}
}
// TestIteratorNodeCount verifies the total number of Next(true) calls
// for a known tree structure.
func TestIteratorNodeCount(t *testing.T) {

78
trie/bintrie/pack.go Normal file
View file

@ -0,0 +1,78 @@
// Copyright 2026 go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package bintrie
import (
"encoding/binary"
"github.com/holiman/uint256"
)
// PackBasicData encodes an account's basic metadata (code size, nonce,
// balance) into the 32-byte BasicData leaf value defined by EIP-7864.
//
// The canonical spec layout is:
//
// byte 0 version (currently always 0, left as the implicit zero)
// bytes 1..4 reserved
// bytes 5..7 code_size (big-endian, 3 bytes, max 2^24-1)
// bytes 8..15 nonce (big-endian, 8 bytes)
// bytes 16..31 balance (big-endian, right-justified, 16 bytes)
//
// For historical reasons the existing BinaryTrie implementation writes
// code_size as a 4-byte big-endian uint32 starting at byte 4 rather than a
// 3-byte big-endian field starting at byte 5. Byte 4 is reserved per the
// EIP, so for any realistic code size (below 2^24 ≈ 16 MB, well under the
// EIP-170 24 KB contract limit) the high byte is always 0 and the two
// encodings are bit-equivalent. This function preserves that existing
// behavior byte-for-byte so callers can substitute it for the inlined
// encoding in BinaryTrie.UpdateAccount without changing any state root.
//
// Any future correction of the byte offset is a consensus-level change
// and must be coordinated across clients.
func PackBasicData(nonce uint64, balance *uint256.Int, codeSize int) [HashSize]byte {
var data [HashSize]byte
binary.BigEndian.PutUint32(data[BasicDataCodeSizeOffset-1:], uint32(codeSize))
binary.BigEndian.PutUint64(data[BasicDataNonceOffset:], nonce)
// Balance is a 256-bit uint stored right-justified in the lower 16
// bytes of BasicData. For dev-mode accounts whose balance exceeds
// 2^128 - 1 (e.g. 0xff × HashSize), truncate to the upper 16 bytes to
// match the existing BinaryTrie behavior rather than panicking.
balanceBytes := balance.Bytes()
if len(balanceBytes) > 16 {
balanceBytes = balanceBytes[16:]
}
copy(data[HashSize-len(balanceBytes):], balanceBytes[:])
return data
}
// UnpackBasicData is the inverse of PackBasicData. It decodes the code
// size, nonce, and balance fields from a BasicData leaf value.
//
// Note: the returned balance is always 128-bit or smaller because the
// encoding reserves 16 bytes for it; dev-mode accounts whose pre-encoded
// balance exceeded 2^128 - 1 are not recoverable losslessly.
func UnpackBasicData(data [HashSize]byte) (nonce uint64, balance *uint256.Int, codeSize int) {
codeSize = int(binary.BigEndian.Uint32(data[BasicDataCodeSizeOffset-1:]))
nonce = binary.BigEndian.Uint64(data[BasicDataNonceOffset:])
var b [16]byte
copy(b[:], data[BasicDataBalanceOffset:])
balance = new(uint256.Int).SetBytes(b[:])
return
}

View file

@ -242,29 +242,21 @@ func (t *BinaryTrie) GetStorage(addr common.Address, key []byte) ([]byte, error)
}
// UpdateAccount updates the account information for the given address.
//
// The BasicData encoding (nonce, balance, code size packed into 32 bytes)
// is delegated to PackBasicData so that callers outside the trie layer —
// notably the flat-state codec that writes stem blobs to pathdb — can
// produce a bit-identical value without duplicating the layout logic.
func (t *BinaryTrie) UpdateAccount(addr common.Address, acc *types.StateAccount, codeLen int) error {
var (
err error
basicData [HashSize]byte
values = make([][]byte, StemNodeWidth)
stem = GetBinaryTreeKey(addr, zero[:])
values = make([][]byte, StemNodeWidth)
stem = GetBinaryTreeKey(addr, zero[:])
)
binary.BigEndian.PutUint32(basicData[BasicDataCodeSizeOffset-1:], uint32(codeLen))
binary.BigEndian.PutUint64(basicData[BasicDataNonceOffset:], acc.Nonce)
// Because the balance is a max of 16 bytes, truncate
// the extra values. This happens in devmode, where
// 0xff**HashSize is allocated to the developer account.
balanceBytes := acc.Balance.Bytes()
// TODO: reduce the size of the allocation in devmode, then panic instead
// of truncating.
if len(balanceBytes) > 16 {
balanceBytes = balanceBytes[16:]
}
copy(basicData[HashSize-len(balanceBytes):], balanceBytes[:])
basicData := PackBasicData(acc.Nonce, acc.Balance, codeLen)
values[BasicDataLeafKey] = basicData[:]
values[CodeHashLeafKey] = acc.CodeHash[:]
var err error
t.root, err = t.root.InsertValuesAtStem(stem, values, t.nodeResolver, 0)
return err
}
@ -352,9 +344,10 @@ func (t *BinaryTrie) Commit(_ bool) (common.Hash, *trienode.NodeSet) {
}
// NodeIterator returns an iterator that returns nodes of the trie. Iteration
// starts at the key after the given start key.
// starts at the first leaf with key >= startKey. A nil/empty startKey iterates
// the whole trie.
func (t *BinaryTrie) NodeIterator(startKey []byte) (trie.NodeIterator, error) {
return newBinaryNodeIterator(t, nil)
return newBinaryNodeIterator(t, startKey)
}
// Prove constructs a Merkle proof for key. The result contains all encoded nodes

View file

@ -259,6 +259,16 @@ func (set *MergedNodeSet) Merge(other *NodeSet) error {
return nil
}
// MergeSet merges the provided set into local one.
func (set *MergedNodeSet) MergeSet(other *MergedNodeSet) error {
for _, subset := range other.Sets {
if err := set.Merge(subset); err != nil {
return err
}
}
return nil
}
// Nodes returns a two-dimensional map for internal nodes.
func (set *MergedNodeSet) Nodes() map[common.Hash]map[string]*Node {
nodes := make(map[common.Hash]map[string]*Node, len(set.Sets))

View file

@ -132,7 +132,10 @@ func (b *buffer) size() uint64 {
// flush persists the in-memory dirty trie node into the disk if the configured
// memory threshold is reached. Note, all data must be written atomically.
func (b *buffer) flush(root common.Hash, db ethdb.KeyValueStore, freezers []ethdb.AncientWriter, progress []byte, nodesCache, statesCache *fastcache.Cache, id uint64, postFlush func()) {
//
// codec is the flat-state codec used for state persistence and cache key
// derivation. It is supplied by the disk layer's owning Database.
func (b *buffer) flush(root common.Hash, db ethdb.KeyValueStore, codec flatStateCodec, freezers []ethdb.AncientWriter, progress []byte, nodesCache, statesCache *fastcache.Cache, id uint64, postFlush func()) {
if b.done != nil {
panic("duplicated flush operation")
}
@ -158,7 +161,7 @@ func (b *buffer) flush(root common.Hash, db ethdb.KeyValueStore, freezers []ethd
// Terminate the state snapshot generation if it's active
var (
start = time.Now()
batch = db.NewBatchWithSize((b.nodes.dbsize() + b.states.dbsize()) * 11 / 10) // extra 10% for potential pebble internal stuff
batch = db.NewBatchWithSize((b.nodes.dbsize() + b.states.dbsize(codec)) * 11 / 10) // extra 10% for potential pebble internal stuff
)
// Explicitly sync the state freezer to ensure all written data is persisted to disk
// before updating the key-value store.
@ -170,7 +173,11 @@ func (b *buffer) flush(root common.Hash, db ethdb.KeyValueStore, freezers []ethd
return
}
nodes := b.nodes.write(batch, nodesCache)
accounts, slots := b.states.write(batch, progress, statesCache)
accounts, slots, flushErr := b.states.write(batch, codec, progress, statesCache)
if flushErr != nil {
b.flushErr = flushErr
return
}
rawdb.WritePersistentStateID(batch, id)
rawdb.WriteSnapshotRoot(batch, root)

View file

@ -95,19 +95,21 @@ type generatorContext struct {
account *holdableIterator // Iterator of account snapshot data
storage *holdableIterator // Iterator of storage snapshot data
db ethdb.KeyValueStore // Key-value store containing the snapshot data
codec flatStateCodec // Flat-state codec for prefix/key-length selection
batch ethdb.Batch // Database batch for writing data atomically
logged time.Time // The timestamp when last generation progress was displayed
}
// newGeneratorContext initializes the context for generation.
func newGeneratorContext(root common.Hash, marker []byte, db ethdb.KeyValueStore) *generatorContext {
func newGeneratorContext(root common.Hash, marker []byte, db ethdb.KeyValueStore, codec flatStateCodec) *generatorContext {
ctx := &generatorContext{
root: root,
db: db,
codec: codec,
batch: db.NewBatch(),
logged: time.Now(),
}
accMarker, storageMarker := splitMarker(marker)
accMarker, storageMarker := codec.SplitMarker(marker)
ctx.openIterator(snapAccount, accMarker)
ctx.openIterator(snapStorage, storageMarker)
return ctx
@ -118,12 +120,12 @@ func newGeneratorContext(root common.Hash, marker []byte, db ethdb.KeyValueStore
// to time to avoid blocking leveldb compaction for a long time.
func (ctx *generatorContext) openIterator(kind string, start []byte) {
if kind == snapAccount {
iter := ctx.db.NewIterator(rawdb.SnapshotAccountPrefix, start)
ctx.account = newHoldableIterator(rawdb.NewKeyLengthIterator(iter, 1+common.HashLength))
iter := ctx.db.NewIterator(ctx.codec.AccountPrefix(), start)
ctx.account = newHoldableIterator(rawdb.NewKeyLengthIterator(iter, ctx.codec.AccountKeyLength()))
return
}
iter := ctx.db.NewIterator(rawdb.SnapshotStoragePrefix, start)
ctx.storage = newHoldableIterator(rawdb.NewKeyLengthIterator(iter, 1+2*common.HashLength))
iter := ctx.db.NewIterator(ctx.codec.StoragePrefix(), start)
ctx.storage = newHoldableIterator(rawdb.NewKeyLengthIterator(iter, ctx.codec.StorageKeyLength()))
}
// reopenIterator releases the specified snapshot iterator and re-open it

View file

@ -125,10 +125,11 @@ type Database struct {
// readOnly is the flag whether the mutation is allowed to be applied.
// It will be set automatically when the database is journaled during
// the shutdown to reject all following unexpected mutations.
readOnly bool // Flag if database is opened in read only mode
waitSync bool // Flag if database is deactivated due to initial state sync
isVerkle bool // Flag if database is used for verkle tree
hasher nodeHasher // Trie node hasher
readOnly bool // Flag if database is opened in read only mode
waitSync bool // Flag if database is deactivated due to initial state sync
isVerkle bool // Flag if database is used for verkle tree
hasher nodeHasher // Trie node hasher
flatCodec flatStateCodec // Flat-state key derivation, persistence and iteration
config *Config // Configuration for database
diskdb ethdb.Database // Persistent storage for matured trie nodes
@ -153,11 +154,12 @@ func New(diskdb ethdb.Database, config *Config, isVerkle bool) *Database {
config = config.sanitize()
db := &Database{
readOnly: config.ReadOnly,
isVerkle: isVerkle,
config: config,
diskdb: diskdb,
hasher: merkleNodeHasher,
readOnly: config.ReadOnly,
isVerkle: isVerkle,
config: config,
diskdb: diskdb,
hasher: merkleNodeHasher,
flatCodec: &merkleFlatCodec{},
}
// Establish a dedicated database namespace tailored for verkle-specific
// data, ensuring the isolation of both verkle and merkle tree data. It's
@ -167,6 +169,12 @@ func New(diskdb ethdb.Database, config *Config, isVerkle bool) *Database {
if isVerkle {
db.diskdb = rawdb.NewTable(diskdb, string(rawdb.VerklePrefix))
db.hasher = binaryNodeHasher
// Wire the bintrie flat-state codec so the disklayer/buffer/generator
// all use the per-stem on-disk layout. The codec needs a reader for
// the read-modify-write performed by applyWrites; the namespaced
// db.diskdb is the right backing store because all bintrie keys
// (trie nodes AND stem blobs) live under the verkle prefix.
db.flatCodec = newBintrieFlatCodec(db.diskdb)
}
// Construct the layer tree by resolving the in-disk singleton state
// and in-memory layer journal.
@ -232,7 +240,7 @@ func (db *Database) setHistoryIndexer() {
func (db *Database) setStateGenerator() error {
// Load the state snapshot generation progress marker to prevent access
// to uncovered states.
generator, root, err := loadGenerator(db.diskdb, db.hasher)
generator, root, err := loadGenerator(db.diskdb, db.hasher, db.isVerkle)
if err != nil {
return err
}
@ -264,13 +272,18 @@ func (db *Database) setStateGenerator() error {
// Disable the background snapshot building in these circumstances:
// - the database is opened in read only mode
// - the snapshot build is explicitly disabled
// - the database is opened in verkle tree mode
noBuild := db.readOnly || db.config.SnapshotNoBuild || db.isVerkle
//
// Note: bintrie/verkle mode is no longer excluded here. The bintrie
// codec ships its own snapshot generator (see generate_bintrie.go) so
// the unified flat-state path can populate stem blobs from an existing
// trie. Generator dispatch in newGenerator/generator.run picks the
// right routine based on the active flatStateCodec.
noBuild := db.readOnly || db.config.SnapshotNoBuild
// Construct the generator and link it to the disk layer, ensuring that the
// generation progress is resolved to prevent accessing uncovered states
// regardless of whether background state snapshot generation is allowed.
dl.setGenerator(newGenerator(db.diskdb, noBuild, generator.Marker, stats))
dl.setGenerator(newGenerator(db.diskdb, db.flatCodec, noBuild, generator.Marker, stats))
// Short circuit if the background generation is not permitted
if noBuild || db.waitSync {
@ -408,7 +421,9 @@ func (db *Database) Enable(root common.Hash) error {
// Re-construct a new disk layer backed by persistent state
// and schedule the state snapshot generation if it's permitted.
db.tree.init(generateSnapshot(db, root, db.isVerkle || db.config.SnapshotNoBuild))
// Bintrie/verkle is no longer treated as "noBuild" — the bintrie
// generator (Commit 9) handles regeneration from the unified trie.
db.tree.init(generateSnapshot(db, root, db.config.SnapshotNoBuild))
// After snap sync, the state of the database may have changed completely.
// To ensure the history indexer always matches the current state, we must:

View file

@ -17,7 +17,7 @@
package pathdb
import (
"bytes"
"errors"
"fmt"
"sync"
"time"
@ -25,7 +25,6 @@ import (
"github.com/VictoriaMetrics/fastcache"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
)
@ -141,7 +140,13 @@ func (dl *diskLayer) node(owner common.Hash, path []byte, depth int) ([]byte, co
if blob := dl.nodes.Get(nil, key); len(blob) > 0 {
cleanNodeHitMeter.Mark(1)
cleanNodeReadMeter.Mark(int64(len(blob)))
return blob, crypto.Keccak256Hash(blob), nodeLoc{loc: locCleanCache, depth: depth}, nil
// Use the scheme-appropriate hasher (keccak256 for merkle,
// sha256-via-bintrie for binary trie).
h, err := dl.db.hasher(blob)
if err != nil {
return nil, common.Hash{}, nodeLoc{}, fmt.Errorf("hash cached trie node: %w", err)
}
return blob, h, nodeLoc{loc: locCleanCache, depth: depth}, nil
}
cleanNodeMissMeter.Mark(1)
}
@ -161,7 +166,11 @@ func (dl *diskLayer) node(owner common.Hash, path []byte, depth int) ([]byte, co
dl.nodes.Set(key, blob)
cleanNodeWriteMeter.Mark(int64(len(blob)))
}
return blob, crypto.Keccak256Hash(blob), nodeLoc{loc: locDiskLayer, depth: depth}, nil
h, err := dl.db.hasher(blob)
if err != nil {
return nil, common.Hash{}, nodeLoc{}, fmt.Errorf("hash disk trie node: %w", err)
}
return blob, h, nodeLoc{loc: locDiskLayer, depth: depth}, nil
}
// account directly retrieves the account RLP associated with a particular
@ -199,13 +208,15 @@ func (dl *diskLayer) account(hash common.Hash, depth int) ([]byte, error) {
// If the layer is being generated, ensure the requested account has
// already been covered by the generator.
codec := dl.db.flatCodec
marker := dl.genMarker()
if marker != nil && bytes.Compare(hash.Bytes(), marker) > 0 {
if marker != nil && codec.MarkerCompare(hash.Bytes(), marker) > 0 {
return nil, errNotCoveredYet
}
// Try to retrieve the account from the memory cache
cacheKey := codec.AccountCacheKey(hash)
if dl.states != nil {
if blob, found := dl.states.HasGet(nil, hash[:]); found {
if blob, found := dl.states.HasGet(nil, cacheKey); found {
cleanStateHitMeter.Mark(1)
cleanStateReadMeter.Mark(int64(len(blob)))
@ -219,7 +230,7 @@ func (dl *diskLayer) account(hash common.Hash, depth int) ([]byte, error) {
cleanStateMissMeter.Mark(1)
}
// Try to retrieve the account from the disk.
blob := rawdb.ReadAccountSnapshot(dl.db.diskdb, hash)
blob := codec.ReadAccount(dl.db.diskdb, hash)
// Store the resolved data in the clean cache. The background buffer flusher
// may also write to the clean cache concurrently, but two writers cannot
@ -227,7 +238,7 @@ func (dl *diskLayer) account(hash common.Hash, depth int) ([]byte, error) {
// it will be found in the frozen buffer, eliminating the need to check the
// database.
if dl.states != nil {
dl.states.Set(hash[:], blob)
dl.states.Set(cacheKey, blob)
cleanStateWriteMeter.Mark(int64(len(blob)))
}
if len(blob) == 0 {
@ -276,14 +287,27 @@ func (dl *diskLayer) storage(accountHash, storageHash common.Hash, depth int) ([
// If the layer is being generated, ensure the requested storage slot
// has already been covered by the generator.
key := storageKeySlice(accountHash, storageHash)
//
// The codec derives the scheme-appropriate marker comparison key:
// merkle uses the 64-byte (accountHash||storageHash) concatenation;
// bintrie uses the 32-byte storageHash directly (which is the full
// stem||offset key matching the bintrie generator's 32-byte marker).
// Pre-A4 this always used the 64-byte shape, which was fail-open
// for bintrie because the zero accountHash sorts before any
// sha256-derived marker byte.
codec := dl.db.flatCodec
markerKey := codec.StorageMarkerKey(accountHash, storageHash)
marker := dl.genMarker()
if marker != nil && bytes.Compare(key, marker) > 0 {
if marker != nil && codec.MarkerCompare(markerKey, marker) > 0 {
return nil, errNotCoveredYet
}
// Try to retrieve the storage slot from the memory cache
// Try to retrieve the storage slot from the memory cache. The codec
// decides the cache key shape so it can avoid colliding with account
// keys (relevant once the bintrie codec lands; for merkle this remains
// the historical 64-byte combined key).
cacheKey := codec.StorageCacheKey(accountHash, storageHash)
if dl.states != nil {
if blob, found := dl.states.HasGet(nil, key); found {
if blob, found := dl.states.HasGet(nil, cacheKey); found {
cleanStateHitMeter.Mark(1)
cleanStateReadMeter.Mark(int64(len(blob)))
@ -296,8 +320,8 @@ func (dl *diskLayer) storage(accountHash, storageHash common.Hash, depth int) ([
}
cleanStateMissMeter.Mark(1)
}
// Try to retrieve the account from the disk
blob := rawdb.ReadStorageSnapshot(dl.db.diskdb, accountHash, storageHash)
// Try to retrieve the storage slot from the disk
blob := codec.ReadStorage(dl.db.diskdb, accountHash, storageHash)
// Store the resolved data in the clean cache. The background buffer flusher
// may also write to the clean cache concurrently, but two writers cannot
@ -305,7 +329,7 @@ func (dl *diskLayer) storage(accountHash, storageHash common.Hash, depth int) ([
// it will be found in the frozen buffer, eliminating the need to check the
// database.
if dl.states != nil {
dl.states.Set(key, blob)
dl.states.Set(cacheKey, blob)
cleanStateWriteMeter.Mark(int64(len(blob)))
}
if len(blob) == 0 {
@ -491,7 +515,7 @@ func (dl *diskLayer) commit(bottom *diffLayer, force bool) (*diskLayer, error) {
// Freeze the live buffer and schedule background flushing
dl.frozen = combined
dl.frozen.flush(bottom.root, dl.db.diskdb, []ethdb.AncientWriter{dl.db.stateFreezer, dl.db.trienodeFreezer}, progress, dl.nodes, dl.states, bottom.stateID(), func() {
dl.frozen.flush(bottom.root, dl.db.diskdb, dl.db.flatCodec, []ethdb.AncientWriter{dl.db.stateFreezer, dl.db.trienodeFreezer}, progress, dl.nodes, dl.states, bottom.stateID(), func() {
// Resume the background generation if it's not completed yet.
// The generator is assumed to be available if the progress is
// not nil.
@ -530,6 +554,14 @@ func (dl *diskLayer) revert(h *stateHistory) (*diskLayer, error) {
if dl.id == 0 {
return nil, fmt.Errorf("%w: zero state id", errStateUnrecoverable)
}
// Bintrie flat state does not yet support revert. State history for
// bintrie carries keccak-keyed account/storage entries (the merkle
// shape), but the bintrie disk layout is per-stem and the merkle
// origin maps cannot be replayed onto it. Reorgs would silently
// produce wrong answers — fail loudly here so misuse is obvious.
if _, isBintrie := dl.db.flatCodec.(*bintrieFlatCodec); isBintrie {
return nil, errors.New("bintrie flat state revert is not supported")
}
// Apply the reverse state changes upon the current state. This must
// be done before holding the lock in order to access state in "this"
// layer.
@ -599,7 +631,9 @@ func (dl *diskLayer) revert(h *stateHistory) (*diskLayer, error) {
writeNodes(batch, nodes, dl.nodes)
// Provide the original values of modified accounts and storages for revert
writeStates(batch, progress, accounts, storages, dl.states)
if _, _, err := writeStates(batch, dl.db.flatCodec, progress, accounts, storages, dl.states); err != nil {
return nil, err
}
rawdb.WritePersistentStateID(batch, dl.id-1)
rawdb.WriteSnapshotRoot(batch, h.meta.parent)
if err := batch.Write(); err != nil {

316
triedb/pathdb/flat_codec.go Normal file
View file

@ -0,0 +1,316 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package pathdb
import (
"bytes"
"github.com/VictoriaMetrics/fastcache"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb"
)
// flatStateCodec abstracts the trie-specific aspects of flat-state storage:
// key derivation from (address, slot), persistence of account/storage entries
// to disk, clean-cache key disambiguation, and iterator construction.
//
// It mirrors the existing nodeHasher pattern (a hot, small interface plugged
// into the Database struct), and complements the Hasher interface from
// state-hasher-iface-2 which abstracts trie-side hashing/commit.
//
// Two implementations are provided:
// - merkleFlatCodec: keccak-keyed flat state, the historical MPT scheme.
// - bintrieFlatCodec: per-stem flat state for the unified binary trie.
// Wired into pathdb.Database.New when isVerkle is true.
//
// All methods MUST be safe for concurrent use; the codec is shared across
// goroutines (the disk layer's read path, the buffer flush path, and the
// background generator may all call into it simultaneously).
type flatStateCodec interface {
// AccountKey derives the flat-state lookup key for an account.
//
// For Merkle: returns keccak256(addr).
// For Bintrie: returns the full 32-byte tree key (stem || offset) for
// the BasicData leaf. Since BasicDataLeafKey is 0, the last byte is
// zero, but the result is a full key — callers use stemFromKey /
// offsetFromKey to decompose it.
AccountKey(addr common.Address) common.Hash
// StorageKey derives the flat-state lookup keys for a storage slot.
//
// The first return value carries the account-side hash (e.g.
// keccak256(addr) for Merkle, or zero for bintrie which has no per-account
// grouping). The second return value carries the slot-side hash
// (keccak256(slot) for Merkle, or the full bintrie key for bintrie).
//
// Read/Write methods receive the same pair, so the codec implementation
// is the only place that has to interpret them.
StorageKey(addr common.Address, slot common.Hash) (accountKey common.Hash, storageKey common.Hash)
// ReadAccount loads an account flat-state entry from persistent storage.
// Returns nil if the entry is not present.
ReadAccount(db ethdb.KeyValueReader, key common.Hash) []byte
// ReadStorage loads a storage flat-state entry from persistent storage.
// Returns nil if the entry is not present.
ReadStorage(db ethdb.KeyValueReader, accountKey common.Hash, storageKey common.Hash) []byte
// WriteAccount persists an account flat-state entry into the supplied batch.
WriteAccount(batch ethdb.Batch, key common.Hash, blob []byte)
// DeleteAccount removes an account flat-state entry via the supplied batch.
DeleteAccount(batch ethdb.Batch, key common.Hash)
// WriteStorage persists a storage flat-state entry into the supplied batch.
WriteStorage(batch ethdb.Batch, accountKey common.Hash, storageKey common.Hash, blob []byte)
// DeleteStorage removes a storage flat-state entry via the supplied batch.
DeleteStorage(batch ethdb.Batch, accountKey common.Hash, storageKey common.Hash)
// AccountCacheKey returns the byte key used in the disk-layer clean state
// cache (fastcache) for an account entry. The cache is shared between
// account and storage lookups, so codecs must ensure their key spaces are
// disjoint to avoid collisions.
AccountCacheKey(key common.Hash) []byte
// StorageCacheKey returns the byte key used in the disk-layer clean state
// cache (fastcache) for a storage entry. See AccountCacheKey for the
// disjointness requirement.
StorageCacheKey(accountKey common.Hash, storageKey common.Hash) []byte
// AccountPrefix returns the rawdb key prefix used by account entries on
// disk. Used by the generator to set up its account-range iterator.
AccountPrefix() []byte
// StoragePrefix returns the rawdb key prefix used by storage entries on
// disk. Used by the generator to set up its storage-range iterator.
StoragePrefix() []byte
// AccountKeyLength returns the expected total length (prefix + payload)
// of an on-disk account key. The generator uses this to filter spurious
// matches when iterating with a length-bounded iterator.
AccountKeyLength() int
// StorageKeyLength returns the expected total length (prefix + payload)
// of an on-disk storage key. See AccountKeyLength.
StorageKeyLength() int
// AccountPrefixSize returns the per-entry on-disk overhead used by the
// stateSet to estimate flush sizes. This is just the prefix length for
// merkle codecs; bintrie codecs may use a different convention.
AccountPrefixSize() int
// StoragePrefixSize returns the per-entry on-disk overhead for storage
// entries.
StoragePrefixSize() int
// SplitMarker decomposes a generation progress marker into the account
// portion and the full marker. For Merkle the account part is the first
// 32 bytes; for bintrie both halves are the same single 32-byte stem.
SplitMarker(marker []byte) (accountMarker []byte, fullMarker []byte)
// MarkerCompare compares a flat-state key against a generation progress
// marker. Returns the same semantics as bytes.Compare. Used by the
// disklayer.account/storage gating logic and by writeStates.
MarkerCompare(key []byte, marker []byte) int
// StorageMarkerKey returns the byte representation used to compare a
// (accountHash, storageHash) pair against the generator progress
// marker in disklayer.storage's generation-progress gate. Merkle
// uses the 64-byte concatenation (two-tier keying); bintrie uses
// the 32-byte storageHash directly (single-tier, stem||offset key
// space matching the bintrie generator's 32-byte marker).
StorageMarkerKey(accountHash, storageHash common.Hash) []byte
// Flush drains all pending mutations from the in-memory accountData and
// storageData maps into the supplied batch and updates the clean cache
// in lockstep. The codec controls iteration order, key derivation, and
// any aggregation that may be required (e.g. the bintrie codec must
// merge per-offset writes into per-stem read-modify-writes to avoid
// quadratic disk reads).
//
// Entries strictly past genMarker (per the codec's MarkerCompare
// semantics) are skipped because they will be regenerated by the
// background snapshot generator.
//
// Returns (account-entry count, storage-entry count) for metric
// reporting; the merkle codec reports one per map entry, while the
// bintrie codec reports one per logical offset write (so the metrics
// remain comparable across schemes).
Flush(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int, error)
}
// merkleFlatCodec implements flatStateCodec for the keccak-keyed MPT flat
// state scheme. All methods are thin wrappers over rawdb accessors and
// existing helpers; this codec preserves the historical behavior bit-for-bit.
type merkleFlatCodec struct{}
// Compile-time interface check.
var _ flatStateCodec = (*merkleFlatCodec)(nil)
func (c *merkleFlatCodec) AccountKey(addr common.Address) common.Hash {
return crypto.Keccak256Hash(addr.Bytes())
}
func (c *merkleFlatCodec) StorageKey(addr common.Address, slot common.Hash) (common.Hash, common.Hash) {
return crypto.Keccak256Hash(addr.Bytes()), crypto.Keccak256Hash(slot.Bytes())
}
func (c *merkleFlatCodec) ReadAccount(db ethdb.KeyValueReader, key common.Hash) []byte {
return rawdb.ReadAccountSnapshot(db, key)
}
func (c *merkleFlatCodec) ReadStorage(db ethdb.KeyValueReader, accountKey, storageKey common.Hash) []byte {
return rawdb.ReadStorageSnapshot(db, accountKey, storageKey)
}
func (c *merkleFlatCodec) WriteAccount(batch ethdb.Batch, key common.Hash, blob []byte) {
rawdb.WriteAccountSnapshot(batch, key, blob)
}
func (c *merkleFlatCodec) DeleteAccount(batch ethdb.Batch, key common.Hash) {
rawdb.DeleteAccountSnapshot(batch, key)
}
func (c *merkleFlatCodec) WriteStorage(batch ethdb.Batch, accountKey, storageKey common.Hash, blob []byte) {
rawdb.WriteStorageSnapshot(batch, accountKey, storageKey, blob)
}
func (c *merkleFlatCodec) DeleteStorage(batch ethdb.Batch, accountKey, storageKey common.Hash) {
rawdb.DeleteStorageSnapshot(batch, accountKey, storageKey)
}
func (c *merkleFlatCodec) AccountCacheKey(key common.Hash) []byte {
// The historical merkle clean cache uses the bare 32-byte account hash.
// This is a slice into the caller's hash; callers must not retain it.
return key[:]
}
func (c *merkleFlatCodec) StorageCacheKey(accountKey, storageKey common.Hash) []byte {
return storageKeySlice(accountKey, storageKey)
}
func (c *merkleFlatCodec) AccountPrefix() []byte {
return rawdb.SnapshotAccountPrefix
}
func (c *merkleFlatCodec) StoragePrefix() []byte {
return rawdb.SnapshotStoragePrefix
}
func (c *merkleFlatCodec) AccountKeyLength() int {
return len(rawdb.SnapshotAccountPrefix) + common.HashLength
}
func (c *merkleFlatCodec) StorageKeyLength() int {
return len(rawdb.SnapshotStoragePrefix) + 2*common.HashLength
}
func (c *merkleFlatCodec) AccountPrefixSize() int {
return len(rawdb.SnapshotAccountPrefix)
}
func (c *merkleFlatCodec) StoragePrefixSize() int {
return len(rawdb.SnapshotStoragePrefix)
}
func (c *merkleFlatCodec) SplitMarker(marker []byte) ([]byte, []byte) {
var accMarker []byte
if len(marker) > 0 {
accMarker = marker[:common.HashLength]
}
return accMarker, marker
}
func (c *merkleFlatCodec) MarkerCompare(key []byte, marker []byte) int {
return bytes.Compare(key, marker)
}
func (c *merkleFlatCodec) StorageMarkerKey(accountHash, storageHash common.Hash) []byte {
return storageKeySlice(accountHash, storageHash)
}
// Flush drains the supplied account/storage maps into the batch using the
// historical merkle per-entry layout: one rawdb write per accountData entry
// and one per storage slot. Entries past the genMarker are skipped (the
// generator will fill them in). The clean cache is kept in sync with each
// write so subsequent reads do not stale.
//
// This is the implementation that previously lived directly in writeStates.
// It has been moved into the codec so the bintrie codec can supply its own
// per-stem aggregating implementation alongside this one.
func (c *merkleFlatCodec) Flush(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int, error) {
var (
accounts int
slots int
)
for addrHash, blob := range accountData {
// Skip any account not yet covered by the snapshot. The account
// at the generation marker position (addrHash == genMarker[:common.HashLength])
// should still be updated, as it would be skipped in the next
// generation cycle.
if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 {
continue
}
accounts++
cacheKey := c.AccountCacheKey(addrHash)
if len(blob) == 0 {
c.DeleteAccount(batch, addrHash)
if clean != nil {
clean.Set(cacheKey, []byte{})
}
} else {
c.WriteAccount(batch, addrHash, blob)
if clean != nil {
clean.Set(cacheKey, blob)
}
}
}
for addrHash, storages := range storageData {
// Skip any account not covered yet by the snapshot
if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 {
continue
}
midAccount := genMarker != nil && bytes.Equal(addrHash[:], genMarker[:common.HashLength])
for storageHash, blob := range storages {
// Skip any storage slot not yet covered by the snapshot. The storage slot
// at the generation marker position (addrHash == genMarker[:common.HashLength]
// and storageHash == genMarker[common.HashLength:]) should still be updated,
// as it would be skipped in the next generation cycle.
if midAccount && bytes.Compare(storageHash[:], genMarker[common.HashLength:]) > 0 {
continue
}
slots++
cacheKey := c.StorageCacheKey(addrHash, storageHash)
if len(blob) == 0 {
c.DeleteStorage(batch, addrHash, storageHash)
if clean != nil {
clean.Set(cacheKey, []byte{})
}
} else {
c.WriteStorage(batch, addrHash, storageHash, blob)
if clean != nil {
clean.Set(cacheKey, blob)
}
}
}
}
return accounts, slots, nil
}

View file

@ -0,0 +1,515 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package pathdb
import (
"bytes"
"fmt"
"github.com/VictoriaMetrics/fastcache"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie/bintrie"
)
// bintrieFlatCodec implements flatStateCodec for the binary trie using the
// stem-blob on-disk layout defined in stem_blob.go. Keys are the 32-byte
// stems of the EIP-7864 binary state tree (the first 31 bytes of the full
// bintrie key, zero-padded into a common.Hash) and values are packed stem
// blobs containing the subset of 256 offsets that have been written at
// that stem.
//
// Unlike merkleFlatCodec (which is a stateless singleton), this codec
// holds a reference to the underlying key-value store so its Write/Delete
// methods can perform a read-modify-write on the existing stem blob
// before merging in the new (offset, value) pair. ethdb.Batch is
// write-only, so the batch passed to Write* cannot be used to fetch the
// current state of a stem.
//
// Pre-aggregation requirement: within a single flush pass, the caller
// must NOT issue two Write* calls targeting the same stem. The codec
// reads the stem from the store (not from the in-flight batch), so a
// second write at the same stem would re-read the pre-flush state and
// clobber the first write. The codec's public surface area is designed
// around this assumption; the Flush method pre-aggregates per-stem
// writes so callers do not have to handle this manually.
//
// This codec is wired into pathdb.Database.New when isVerkle is true
// (see database.go). The leaf-production hook in binaryHasher emits
// per-offset writes via DrainStemWrites, which encodeBinary routes
// into the per-offset accountData map consumed by Flush.
type bintrieFlatCodec struct {
// db is the underlying key-value store used by applyWrites to read
// the current stem blob before merging in new (offset, value) pairs.
// It is always the pathdb Database's already-wrapped diskdb (the
// VerklePrefix-namespaced table) so reads and writes share the same
// on-disk key space.
db ethdb.KeyValueReader
}
// newBintrieFlatCodec constructs a bintrieFlatCodec bound to the given
// key-value reader. The reader is used for read-modify-write on stem
// blobs; writes still flow through the ethdb.Batch passed to each
// Write*/Delete* call.
func newBintrieFlatCodec(db ethdb.KeyValueReader) *bintrieFlatCodec {
return &bintrieFlatCodec{db: db}
}
// Compile-time interface assertion.
var _ flatStateCodec = (*bintrieFlatCodec)(nil)
// bintrieCacheKeyPrefix is a one-byte prefix applied to all bintrie cache
// keys to keep them disjoint from merkle account keys (which are raw
// 32-byte hashes) and merkle storage keys (which are 64-byte
// accountHash||storageHash) in the shared clean-state fastcache. Without a
// prefix, a 32-byte merkle account hash and a 32-byte bintrie stem could
// collide on the same cache slot and return wrong data on read.
const bintrieCacheKeyPrefix byte = 0x01
// stemFromKey extracts the 31-byte stem from a 32-byte flat-state key.
// Bintrie keys follow the "stem || offset" layout (EIP-7864), so the stem
// is always bytes [0..30] and the byte at index 31 is the offset within
// the stem. Callers that use AccountKey()/StorageKey() followed by
// Read/Write never need to look at the offset themselves — the codec
// handles offset extraction internally.
func stemFromKey(key common.Hash) []byte {
return key[:bintrie.StemSize]
}
// offsetFromKey returns the offset byte of a 32-byte flat-state key.
func offsetFromKey(key common.Hash) byte {
return key[bintrie.StemSize]
}
// ---------------------------------------------------------------------
// Key derivation
// ---------------------------------------------------------------------
// AccountKey returns the bintrie BasicData key for the given address.
// The result has the account's 31-byte stem in bytes [0..30] and offset 0
// (BasicDataLeafKey) in byte 31. The CodeHash leaf lives at the same stem
// with offset 1, so a single ReadAccount is enough to materialize both
// offsets via the returned stem blob.
func (c *bintrieFlatCodec) AccountKey(addr common.Address) common.Hash {
return common.BytesToHash(bintrie.GetBinaryTreeKeyBasicData(addr))
}
// StorageKey returns the bintrie key for a storage slot. The first return
// value (the "account key" in the merkle naming convention) is the zero
// hash because bintrie has no per-account grouping at the flat-state
// level; the second return value is the full 32-byte slot key (stem ||
// offset). Callers must pass both values back through the Read/Write
// storage methods so the codec can recover the stem and offset.
func (c *bintrieFlatCodec) StorageKey(addr common.Address, slot common.Hash) (common.Hash, common.Hash) {
full := bintrie.GetBinaryTreeKeyStorageSlot(addr, slot[:])
return common.Hash{}, common.BytesToHash(full)
}
// ---------------------------------------------------------------------
// Disk reads
// ---------------------------------------------------------------------
// ReadAccount returns the 32-byte value stored at the offset indicated
// by the input key (the final byte of `key` is the bintrie offset).
// Returns nil if the offset is not populated in the on-disk stem blob.
//
// The per-offset return shape matches ReadStorage and, crucially,
// matches the buffer-path return shape: the pathdb diff-layer buffer
// stores per-offset entries (keyed by the full 32-byte stem||offset
// key) holding 32-byte leaf values. When `disklayer.account()` falls
// through from the buffer to the codec's disk read, both sides must
// agree on the per-offset representation — otherwise a length check in
// the consumer (bintrieFlatReader.Account) fails on every
// post-buffer-flush read. Prior to this commit the disk path returned
// the whole stem blob while the buffer path returned a 32-byte value,
// which caused every real-world read to error once the buffer spilled
// to disk.
//
// A malformed stem blob is treated as "entry absent" (returning nil)
// to match the behavior of rawdb.ReadStorageSnapshot on the merkle
// path — the interface has no error channel, and propagating nil lets
// the multi-reader fall through to the trie reader as a gatekeeper.
func (c *bintrieFlatCodec) ReadAccount(db ethdb.KeyValueReader, key common.Hash) []byte {
blob := rawdb.ReadBinTrieStem(db, stemFromKey(key))
if len(blob) == 0 {
return nil
}
val, err := extractStemOffset(blob, offsetFromKey(key))
if err != nil {
log.Error("Corrupt bintrie stem blob in ReadAccount", "key", key, "err", err)
return nil
}
return val
}
// ReadStorage returns the 32-byte value stored at the storage slot's
// offset within its stem, or nil if the offset is not populated.
// Like ReadAccount, it extracts a single offset from the on-disk stem
// blob. A malformed stem blob is treated as absent and logged.
//
// The first parameter (accountKey) is ignored: see StorageKey for the
// reasoning behind the bintrie's zero-hash convention.
func (c *bintrieFlatCodec) ReadStorage(db ethdb.KeyValueReader, _ common.Hash, storageKey common.Hash) []byte {
blob := rawdb.ReadBinTrieStem(db, stemFromKey(storageKey))
if len(blob) == 0 {
return nil
}
val, err := extractStemOffset(blob, offsetFromKey(storageKey))
if err != nil {
log.Error("Corrupt bintrie stem blob in ReadStorage", "key", storageKey, "err", err)
return nil
}
return val
}
// ---------------------------------------------------------------------
// Disk writes
// ---------------------------------------------------------------------
// WriteAccount writes an account entry. The blob is expected to be a
// two-slot payload containing BasicData (bytes 0..31) followed by the
// code hash (bytes 32..63) — the caller (binaryHasher) packs these
// together because they live at the same stem and benefit from a
// single read-modify-write pass.
//
// Writing nil or an empty blob is equivalent to clearing offsets 0 and 1
// at this stem (a partial account deletion); the codec merges the
// resulting bitmap into the existing stem blob and deletes the key
// entirely if no offsets remain set.
//
// An error from mergeStemBlob (e.g. malformed existing blob) is logged
// via log.Crit because flat-state corruption is unrecoverable at this
// layer — same policy as rawdb.WriteAccountSnapshot.
func (c *bintrieFlatCodec) WriteAccount(batch ethdb.Batch, key common.Hash, blob []byte) {
writes, err := splitAccountBlob(blob)
if err != nil {
log.Crit("bintrie WriteAccount: split failed", "key", key, "err", err)
}
if _, err := c.applyWrites(batch, stemFromKey(key), writes); err != nil {
log.Crit("bintrie WriteAccount: apply failed", "key", key, "err", err)
}
}
// DeleteAccount clears offsets 0 (BasicData) and 1 (CodeHash) at the
// account's stem. Other offsets at the same stem (e.g. header storage
// slots) are NOT touched — callers that want a full account wipe must
// walk storage separately, which is consistent with the bintrie's
// DeleteAccount semantics (see trie/bintrie/trie.go).
func (c *bintrieFlatCodec) DeleteAccount(batch ethdb.Batch, key common.Hash) {
writes := []stemOffsetValue{
{Offset: bintrie.BasicDataLeafKey, Value: nil},
{Offset: bintrie.CodeHashLeafKey, Value: nil},
}
if _, err := c.applyWrites(batch, stemFromKey(key), writes); err != nil {
log.Crit("bintrie DeleteAccount: apply failed", "key", key, "err", err)
}
}
// WriteStorage writes a single storage-slot value. The blob must be 32
// bytes (the canonical storage value width); a shorter/longer blob is a
// caller bug and is logged via log.Crit.
//
// The first parameter (accountKey) is ignored — see StorageKey.
func (c *bintrieFlatCodec) WriteStorage(batch ethdb.Batch, _ common.Hash, storageKey common.Hash, blob []byte) {
if len(blob) != stemBlobValueSize {
log.Crit("bintrie WriteStorage: wrong value length", "key", storageKey, "len", len(blob), "want", stemBlobValueSize)
}
writes := []stemOffsetValue{{Offset: offsetFromKey(storageKey), Value: blob}}
if _, err := c.applyWrites(batch, stemFromKey(storageKey), writes); err != nil {
log.Crit("bintrie WriteStorage: apply failed", "key", storageKey, "err", err)
}
}
// DeleteStorage clears a single offset at a stem. If the stem has no
// other populated offsets afterwards, the key is removed entirely.
func (c *bintrieFlatCodec) DeleteStorage(batch ethdb.Batch, _ common.Hash, storageKey common.Hash) {
writes := []stemOffsetValue{{Offset: offsetFromKey(storageKey), Value: nil}}
if _, err := c.applyWrites(batch, stemFromKey(storageKey), writes); err != nil {
log.Crit("bintrie DeleteStorage: apply failed", "key", storageKey, "err", err)
}
}
// applyWrites performs a read-modify-write on the given stem: reads the
// existing blob via the codec's bound reader, merges in the supplied
// (offset, value) pairs, and writes the result back via the batch — or
// deletes the key if the merged result is empty. Shared by all four
// Write/Delete methods to ensure the policy (nil value clears, empty
// blob deletes) is consistent.
//
// Returns the merged blob (or nil if the stem was deleted) so callers
// such as Flush can repopulate the clean cache without an extra disk
// read. The returned slice is freshly allocated and owned by the caller.
//
// Important: the read comes from c.db, NOT from the batch. A second
// call for the same stem within a flush would re-read the pre-flush
// state; see the pre-aggregation requirement documented on
// bintrieFlatCodec.
func (c *bintrieFlatCodec) applyWrites(batch ethdb.Batch, stem []byte, writes []stemOffsetValue) ([]byte, error) {
existing := rawdb.ReadBinTrieStem(c.db, stem)
merged, err := mergeStemBlob(existing, writes)
if err != nil {
return nil, fmt.Errorf("bintrie stem %x: %w", stem, err)
}
if merged == nil {
rawdb.DeleteBinTrieStem(batch, stem)
return nil, nil
}
rawdb.WriteBinTrieStem(batch, stem, merged)
return merged, nil
}
// splitAccountBlob validates and splits the two-slot account payload
// passed to WriteAccount. A nil or empty blob is interpreted as
// "clear both offsets".
func splitAccountBlob(blob []byte) ([]stemOffsetValue, error) {
if len(blob) == 0 {
return []stemOffsetValue{
{Offset: bintrie.BasicDataLeafKey, Value: nil},
{Offset: bintrie.CodeHashLeafKey, Value: nil},
}, nil
}
if len(blob) != 2*stemBlobValueSize {
return nil, fmt.Errorf("account blob len %d, want %d (BasicData || CodeHash)", len(blob), 2*stemBlobValueSize)
}
return []stemOffsetValue{
{Offset: bintrie.BasicDataLeafKey, Value: blob[:stemBlobValueSize]},
{Offset: bintrie.CodeHashLeafKey, Value: blob[stemBlobValueSize:]},
}, nil
}
// ---------------------------------------------------------------------
// Clean-cache keys
// ---------------------------------------------------------------------
// AccountCacheKey returns a disambiguated byte key for the shared
// fastcache-backed clean state cache. The prefix byte
// bintrieCacheKeyPrefix keeps bintrie lookups disjoint from merkle
// account lookups (32-byte keys) and from merkle storage lookups
// (64-byte keys).
//
// The full 32-byte (stem || offset) key is embedded after the prefix
// so each offset at a given stem gets its own cache entry. This is
// required because ReadAccount now returns the per-offset 32-byte
// leaf value rather than the whole stem blob: caching under a
// stem-only key would collapse BasicData and CodeHash (or any two
// offsets at the same stem) into a single slot and the second hit
// would return the wrong offset's value.
//
// Resulting layout: 1 byte prefix + 32 bytes full key = 33 bytes
// total. The stem is at bytes[1..31]; the offset is at byte[32].
func (c *bintrieFlatCodec) AccountCacheKey(key common.Hash) []byte {
out := make([]byte, 1+common.HashLength)
out[0] = bintrieCacheKeyPrefix
copy(out[1:], key[:])
return out
}
// StorageCacheKey returns the cache key for a storage entry. The
// accountKey parameter is ignored (see StorageKey). The full storage
// key — which already encodes (stem || offset) via
// GetBinaryTreeKeyStorageSlot — is embedded directly so each slot at
// a stem has its own cache entry, matching the per-offset semantics
// of AccountCacheKey.
func (c *bintrieFlatCodec) StorageCacheKey(_ common.Hash, storageKey common.Hash) []byte {
out := make([]byte, 1+common.HashLength)
out[0] = bintrieCacheKeyPrefix
copy(out[1:], storageKey[:])
return out
}
// ---------------------------------------------------------------------
// Generator iterator configuration
// ---------------------------------------------------------------------
// AccountPrefix returns the rawdb key prefix used for bintrie flat-state
// entries. The generator iterator uses this prefix to walk all stem
// blobs for the initial population of the flat state from an existing
// bintrie.
func (c *bintrieFlatCodec) AccountPrefix() []byte {
return rawdb.BinTrieStemPrefix
}
// StoragePrefix returns the same prefix as AccountPrefix because bintrie
// flat-state entries are stored in a single namespace (stems contain
// both account and storage data). The bintrie generator
// (generate_bintrie.go) uses a single iterator over this prefix
// rather than the two-tier account-then-storage walk used by the
// merkle generator.
func (c *bintrieFlatCodec) StoragePrefix() []byte {
return rawdb.BinTrieStemPrefix
}
// AccountKeyLength returns the expected on-disk key length for a stem
// entry: 1 byte of prefix + 31 bytes of stem = 32 bytes total.
func (c *bintrieFlatCodec) AccountKeyLength() int {
return len(rawdb.BinTrieStemPrefix) + bintrie.StemSize
}
// StorageKeyLength returns the same length as AccountKeyLength because
// bintrie stems are a single unified namespace.
func (c *bintrieFlatCodec) StorageKeyLength() int {
return len(rawdb.BinTrieStemPrefix) + bintrie.StemSize
}
// AccountPrefixSize returns the per-entry on-disk overhead used by the
// stateSet to estimate flush sizes. For bintrie this is just the single
// byte of BinTrieStemPrefix.
func (c *bintrieFlatCodec) AccountPrefixSize() int {
return len(rawdb.BinTrieStemPrefix)
}
// StoragePrefixSize returns the same as AccountPrefixSize.
func (c *bintrieFlatCodec) StoragePrefixSize() int {
return len(rawdb.BinTrieStemPrefix)
}
// ---------------------------------------------------------------------
// Generation progress marker
// ---------------------------------------------------------------------
// SplitMarker splits a generation progress marker into the account and
// full components. For bintrie the marker is a full 32-byte key
// (stem || offset), not the merkle two-tier
// account-then-storage format, so both returned slices point at the
// same data. The second half of the merkle marker (storage offset) has
// no equivalent for bintrie: the generator iterates stems directly,
// not (account, storage) pairs.
func (c *bintrieFlatCodec) SplitMarker(marker []byte) ([]byte, []byte) {
if len(marker) == 0 {
return nil, marker
}
return marker, marker
}
// MarkerCompare compares a flat-state key against a progress marker with
// bytes.Compare semantics, mirroring the merkle codec. The bintrie keys
// being compared are stem bytes (31 bytes) or full keys (32 bytes); both
// are lexicographically ordered so bytes.Compare is the correct
// ordering.
func (c *bintrieFlatCodec) MarkerCompare(key []byte, marker []byte) int {
return bytes.Compare(key, marker)
}
// StorageMarkerKey returns the 32-byte storageHash directly. For bintrie,
// the storageHash IS the full (stem || offset) key because
// bintrieFlatCodec.StorageKey returns (zeroHash, fullKey). Comparing
// this directly against the 32-byte generator marker yields the correct
// ordering — unlike the merkle 64-byte combined key which was fail-open
// for bintrie.
func (c *bintrieFlatCodec) StorageMarkerKey(_ common.Hash, storageHash common.Hash) []byte {
return storageHash[:]
}
// Flush drains the in-memory accountData and storageData maps into the
// batch using the bintrie per-stem layout. The maps are expected to hold
// per-offset entries — each key is a 32-byte (stem || offset) tuple
// produced by AccountKey/StorageKey, and each value is a 32-byte leaf
// (or nil to clear that offset).
//
// Writes are aggregated per stem and a single read-modify-write is
// issued per stem, so the codec touches each stem at most once during
// a flush and the per-call pre-aggregation requirement is satisfied
// even when many writes target the same stem.
//
// storageData is walked alongside accountData; bintrie entries should
// normally arrive on accountData but we accept either layout for
// robustness.
//
// Cache update: after the per-stem RMW, the clean cache is updated
// with each written offset's new value (per-offset entries, matching
// the shape returned by ReadAccount). Offsets
// that were not touched by this flush retain their existing cache
// entries, which remain valid because the RMW did not modify them.
//
// Returns (offset count from accountData, offset count from storageData)
// for metric reporting parity with the merkle path.
func (c *bintrieFlatCodec) Flush(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int, error) {
// Aggregate per-offset writes into per-stem batches. We use
// [31]byte as the map key because byte slices aren't hashable in
// Go and the stem is fixed size; the alternative (common.Hash with
// a zero pad) wastes a byte per entry without buying anything.
type aggregator struct {
// fullKeys preserves the original 32-byte lookup keys so the
// cache update loop below can store per-offset entries without
// reconstructing the key from (stem, offset) pairs.
fullKeys []common.Hash
writes []stemOffsetValue
}
aggregated := make(map[[bintrie.StemSize]byte]*aggregator)
addWrite := func(fullKey common.Hash, value []byte) {
var stem [bintrie.StemSize]byte
copy(stem[:], fullKey[:bintrie.StemSize])
offset := fullKey[bintrie.StemSize]
ag, exists := aggregated[stem]
if !exists {
ag = &aggregator{}
aggregated[stem] = ag
}
ag.fullKeys = append(ag.fullKeys, fullKey)
ag.writes = append(ag.writes, stemOffsetValue{Offset: offset, Value: value})
}
var (
accountWrites int
storageWrites int
)
for fullKey, value := range accountData {
// genMarker filtering: skip stems that the generator hasn't
// reached yet. We compare against the FULL key (stem || offset)
// because the bintrie marker is itself a 32-byte key.
if genMarker != nil && bytes.Compare(fullKey[:], genMarker) > 0 {
continue
}
accountWrites++
addWrite(fullKey, value)
}
for _, slots := range storageData {
for fullKey, value := range slots {
if genMarker != nil && bytes.Compare(fullKey[:], genMarker) > 0 {
continue
}
storageWrites++
addWrite(fullKey, value)
}
}
// Issue one RMW per stem, then update the clean cache per-offset
// using the fullKeys we captured in the aggregator. An empty value
// stored in the cache means "confirmed absent" (the reader will
// fall through to the trie reader); a 32-byte value means the
// offset is populated.
for _, ag := range aggregated {
if _, err := c.applyWrites(batch, ag.fullKeys[0][:bintrie.StemSize], ag.writes); err != nil {
return accountWrites, storageWrites, fmt.Errorf("bintrie Flush: %w", err)
}
if clean == nil {
continue
}
for i, fullKey := range ag.fullKeys {
cacheKey := c.AccountCacheKey(fullKey)
val := ag.writes[i].Value
if val == nil {
val = []byte{}
}
clean.Set(cacheKey, val)
}
}
return accountWrites, storageWrites, nil
}

View file

@ -0,0 +1,468 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package pathdb
import (
"bytes"
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/trie/bintrie"
)
// newTestBintrieCodec constructs a bintrieFlatCodec backed by an
// in-memory key-value store. Returns both the codec and the underlying
// store so tests can drive it directly.
func newTestBintrieCodec(t *testing.T) (*bintrieFlatCodec, ethdb.Database) {
t.Helper()
db := rawdb.NewMemoryDatabase()
codec := newBintrieFlatCodec(db)
return codec, db
}
// flushBatch commits a batch built against a memory database. Called
// after each codec write because the in-memory RMW of applyWrites reads
// from the store, not the batch.
func flushBatch(t *testing.T, batch interface{ Write() error }) {
t.Helper()
if err := batch.Write(); err != nil {
t.Fatalf("batch write: %v", err)
}
}
// TestBintrieCodecAccountRoundTrip verifies that an account written via
// WriteAccount (a two-slot BasicData||CodeHash blob) is persisted under
// the account's stem and can be read back by calling ReadAccount with
// the appropriate per-offset key (A1 remediation: ReadAccount now
// returns a per-offset 32-byte value, matching the buffer-path shape).
func TestBintrieCodecAccountRoundTrip(t *testing.T) {
codec, db := newTestBintrieCodec(t)
addr := common.HexToAddress("0x1111111111111111111111111111111111111111")
basicData := bytes.Repeat([]byte{0xAB}, stemBlobValueSize)
codeHash := bytes.Repeat([]byte{0xCD}, stemBlobValueSize)
blob := append(append([]byte{}, basicData...), codeHash...)
batch := db.NewBatch()
codec.WriteAccount(batch, codec.AccountKey(addr), blob)
flushBatch(t, batch)
// Read each offset individually. `codec.AccountKey(addr)` returns
// the BasicData key (offset 0); the CodeHash key has the same stem
// with offset 1.
basicKey := codec.AccountKey(addr)
codeKey := common.BytesToHash(bintrie.GetBinaryTreeKeyCodeHash(addr))
gotBasic := codec.ReadAccount(db, basicKey)
if !bytes.Equal(gotBasic, basicData) {
t.Fatalf("BasicData read: got %x, want %x", gotBasic, basicData)
}
gotCode := codec.ReadAccount(db, codeKey)
if !bytes.Equal(gotCode, codeHash) {
t.Fatalf("CodeHash read: got %x, want %x", gotCode, codeHash)
}
}
// TestBintrieCodecStorageRoundTrip verifies that a storage slot written
// via WriteStorage is persisted at the correct stem+offset and can be
// read back via ReadStorage (which does offset extraction internally).
func TestBintrieCodecStorageRoundTrip(t *testing.T) {
codec, db := newTestBintrieCodec(t)
addr := common.HexToAddress("0x2222222222222222222222222222222222222222")
slot := common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000042")
value := bytes.Repeat([]byte{0x77}, stemBlobValueSize)
acctKey, storageKey := codec.StorageKey(addr, slot)
batch := db.NewBatch()
codec.WriteStorage(batch, acctKey, storageKey, value)
flushBatch(t, batch)
got := codec.ReadStorage(db, acctKey, storageKey)
if !bytes.Equal(got, value) {
t.Fatalf("ReadStorage: got %x, want %x", got, value)
}
}
// TestBintrieCodecMultipleWritesSameStem verifies that two successive
// writes to DIFFERENT offsets at the same stem both persist — this is
// the common case when an account is updated (BasicData + CodeHash at
// stem X) and then a header storage slot at the same stem is written.
//
// Note: because the codec reads RMW from the store (not the batch), the
// caller must flush the batch between writes to the same stem for this
// to work correctly. This test exercises that pattern to ensure the
// per-call contract holds.
func TestBintrieCodecMultipleWritesSameStem(t *testing.T) {
codec, db := newTestBintrieCodec(t)
addr := common.HexToAddress("0x3333333333333333333333333333333333333333")
// Write the account (offsets 0 and 1 at the BasicData stem).
basicData := bytes.Repeat([]byte{0xAA}, stemBlobValueSize)
codeHash := bytes.Repeat([]byte{0xBB}, stemBlobValueSize)
blob := append(append([]byte{}, basicData...), codeHash...)
batch := db.NewBatch()
codec.WriteAccount(batch, codec.AccountKey(addr), blob)
flushBatch(t, batch)
// Now write a header storage slot. Slot 0 (per EIP-7864) lives at
// offset 64 within the SAME stem as BasicData, so this is a
// read-modify-write on the existing stem blob.
slot := common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000000")
storageValue := bytes.Repeat([]byte{0xCC}, stemBlobValueSize)
acctKey, storageKey := codec.StorageKey(addr, slot)
batch = db.NewBatch()
codec.WriteStorage(batch, acctKey, storageKey, storageValue)
flushBatch(t, batch)
// All three offsets should now be readable via per-offset reads.
basicKey := codec.AccountKey(addr)
codeKey := common.BytesToHash(bintrie.GetBinaryTreeKeyCodeHash(addr))
if gotBasic := codec.ReadAccount(db, basicKey); !bytes.Equal(gotBasic, basicData) {
t.Fatalf("BasicData lost after storage write: got %x, want %x", gotBasic, basicData)
}
if gotCode := codec.ReadAccount(db, codeKey); !bytes.Equal(gotCode, codeHash) {
t.Fatalf("CodeHash lost after storage write: got %x, want %x", gotCode, codeHash)
}
gotStorage := codec.ReadStorage(db, acctKey, storageKey)
if !bytes.Equal(gotStorage, storageValue) {
t.Fatalf("Storage: got %x, want %x", gotStorage, storageValue)
}
}
// TestBintrieCodecDeleteAccount verifies that DeleteAccount clears only
// offsets 0 (BasicData) and 1 (CodeHash) at the account's stem, leaving
// any other offsets (e.g. header storage slots) at the same stem
// untouched. This mirrors BinaryTrie.DeleteAccount's intended semantics.
func TestBintrieCodecDeleteAccount(t *testing.T) {
codec, db := newTestBintrieCodec(t)
addr := common.HexToAddress("0x4444444444444444444444444444444444444444")
// Populate account (offsets 0+1) and one header storage slot (offset 64).
basicData := bytes.Repeat([]byte{0xAA}, stemBlobValueSize)
codeHash := bytes.Repeat([]byte{0xBB}, stemBlobValueSize)
batch := db.NewBatch()
codec.WriteAccount(batch, codec.AccountKey(addr), append(basicData, codeHash...))
flushBatch(t, batch)
storageValue := bytes.Repeat([]byte{0xCC}, stemBlobValueSize)
slot := common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000000")
acctKey, storageKey := codec.StorageKey(addr, slot)
batch = db.NewBatch()
codec.WriteStorage(batch, acctKey, storageKey, storageValue)
flushBatch(t, batch)
// Delete the account. Offsets 0 and 1 should be cleared; the
// header storage slot at offset 64 should survive.
batch = db.NewBatch()
codec.DeleteAccount(batch, codec.AccountKey(addr))
flushBatch(t, batch)
basicKey := codec.AccountKey(addr)
codeKey := common.BytesToHash(bintrie.GetBinaryTreeKeyCodeHash(addr))
// Verify the underlying stem blob still exists (the storage slot
// at offset 64 should have prevented a full delete).
stemBlob := rawdb.ReadBinTrieStem(db, stemFromKey(basicKey))
if len(stemBlob) == 0 {
t.Fatal("stem blob was fully deleted; header storage should still be present")
}
// BasicData and CodeHash now read back as nil (offset cleared).
if got := codec.ReadAccount(db, basicKey); got != nil {
t.Fatalf("BasicData not cleared: %x", got)
}
if got := codec.ReadAccount(db, codeKey); got != nil {
t.Fatalf("CodeHash not cleared: %x", got)
}
if got := codec.ReadStorage(db, acctKey, storageKey); !bytes.Equal(got, storageValue) {
t.Fatalf("header storage lost after DeleteAccount: got %x, want %x", got, storageValue)
}
}
// TestBintrieCodecDeleteLastOffsetRemovesKey verifies that when the
// final populated offset at a stem is cleared, the on-disk key is
// removed entirely (zero-length blobs are never persisted).
func TestBintrieCodecDeleteLastOffsetRemovesKey(t *testing.T) {
codec, db := newTestBintrieCodec(t)
addr := common.HexToAddress("0x5555555555555555555555555555555555555555")
slot := common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000080")
value := bytes.Repeat([]byte{0xDD}, stemBlobValueSize)
acctKey, storageKey := codec.StorageKey(addr, slot)
// Write, verify, delete, verify absent.
batch := db.NewBatch()
codec.WriteStorage(batch, acctKey, storageKey, value)
flushBatch(t, batch)
if got := codec.ReadStorage(db, acctKey, storageKey); !bytes.Equal(got, value) {
t.Fatalf("pre-delete read: got %x, want %x", got, value)
}
batch = db.NewBatch()
codec.DeleteStorage(batch, acctKey, storageKey)
flushBatch(t, batch)
// The raw key should be gone from the store.
raw := rawdb.ReadBinTrieStem(db, stemFromKey(storageKey))
if raw != nil {
t.Fatalf("stem blob should be deleted, got %x", raw)
}
// And ReadStorage returns nil.
if got := codec.ReadStorage(db, acctKey, storageKey); got != nil {
t.Fatalf("post-delete read: got %x, want nil", got)
}
}
// TestBintrieCodecCacheKeysDisjoint verifies that the bintrie cache key
// prefix keeps it disjoint from merkle account keys AND that two
// different offsets at the same stem produce DIFFERENT cache keys
// (the A1 remediation moved from per-stem caching to per-offset
// caching — without the full-key embedding, BasicData and CodeHash
// would collide in the cache and return wrong values).
func TestBintrieCodecCacheKeysDisjoint(t *testing.T) {
codec := &bintrieFlatCodec{}
merkle := &merkleFlatCodec{}
// A 32-byte hash that, when passed to both codecs, would collide
// if the bintrie codec didn't prefix-disambiguate its cache keys.
hash := common.HexToHash("0xaabbccddeeff00112233445566778899aabbccddeeff00112233445566778899")
binKey := codec.AccountCacheKey(hash)
merkleKey := merkle.AccountCacheKey(hash)
if bytes.Equal(binKey, merkleKey) {
t.Fatalf("bintrie and merkle cache keys collided: both are %x", binKey)
}
if binKey[0] != bintrieCacheKeyPrefix {
t.Fatalf("bintrie cache key missing prefix byte: %x", binKey)
}
// Per-offset disambiguation: two keys with the same stem but
// different offsets must produce distinct cache keys.
var basicKey common.Hash
copy(basicKey[:], hash[:])
basicKey[31] = bintrie.BasicDataLeafKey
var codeKey common.Hash
copy(codeKey[:], hash[:])
codeKey[31] = bintrie.CodeHashLeafKey
basicCacheKey := codec.AccountCacheKey(basicKey)
codeCacheKey := codec.AccountCacheKey(codeKey)
if bytes.Equal(basicCacheKey, codeCacheKey) {
t.Fatalf("per-offset cache keys collided at same stem: %x", basicCacheKey)
}
}
// TestBintrieCodecSplitMarker verifies the single-tier marker handling.
// For merkle the marker is a two-tier (account, account+storage) pair;
// for bintrie it's a single 32-byte stem key, so SplitMarker returns
// the same slice twice.
func TestBintrieCodecSplitMarker(t *testing.T) {
codec := &bintrieFlatCodec{}
// Nil marker.
acc, full := codec.SplitMarker(nil)
if acc != nil || full != nil {
t.Fatalf("nil marker: acc=%v full=%v, want nil/nil", acc, full)
}
// A 32-byte marker. Both halves point to the same bytes.
marker := bytes.Repeat([]byte{0xAA}, 32)
acc, full = codec.SplitMarker(marker)
if !bytes.Equal(acc, marker) || !bytes.Equal(full, marker) {
t.Fatalf("SplitMarker: acc=%x full=%x, want both %x", acc, full, marker)
}
}
// TestBintrieCodecFlushAggregates verifies the per-stem aggregation that
// the codec's Flush method performs. Two distinct offsets at the SAME stem
// should produce a single on-disk stem blob containing both offsets after
// one Flush call — proving the codec collapses what would have been N
// read-modify-writes into one.
//
// Three offsets are written across two stems (2 + 1) so we exercise both
// the multi-offset and single-offset paths in a single test.
func TestBintrieCodecFlushAggregates(t *testing.T) {
codec, db := newTestBintrieCodec(t)
// Build a per-offset accountData map mimicking what encodeBinary
// produces from a binaryHasher.DrainStemWrites: the keys are full
// 32-byte (stem || offset) tuples and the values are 32-byte leaves.
addr := common.HexToAddress("0xCafeBabeDeadBeef00112233445566778899aabb")
stem := bintrie.GetBinaryTreeKey(addr, make([]byte, 32))[:bintrie.StemSize]
basicData := bytes.Repeat([]byte{0xAA}, stemBlobValueSize)
codeHash := bytes.Repeat([]byte{0xBB}, stemBlobValueSize)
storageVal := bytes.Repeat([]byte{0xCC}, stemBlobValueSize)
otherStem := bytes.Repeat([]byte{0x42}, bintrie.StemSize)
otherVal := bytes.Repeat([]byte{0xDD}, stemBlobValueSize)
mkKey := func(stem []byte, offset byte) common.Hash {
var k common.Hash
copy(k[:bintrie.StemSize], stem)
k[bintrie.StemSize] = offset
return k
}
accountData := map[common.Hash][]byte{
mkKey(stem, bintrie.BasicDataLeafKey): basicData,
mkKey(stem, bintrie.CodeHashLeafKey): codeHash,
mkKey(stem, 64): storageVal, // header storage slot
mkKey(otherStem, bintrie.BasicDataLeafKey): otherVal,
}
batch := db.NewBatch()
accW, stoW, _ := codec.Flush(batch, nil, accountData, nil, nil)
flushBatch(t, batch)
if accW != 4 {
t.Errorf("account write count: got %d, want 4", accW)
}
if stoW != 0 {
t.Errorf("storage write count: got %d, want 0 (no storage map)", stoW)
}
// All three offsets at `stem` should be readable from a single on-disk
// blob; aggregation worked iff the second/third writes did not clobber
// the first.
blob := rawdb.ReadBinTrieStem(db, stem)
if len(blob) == 0 {
t.Fatal("stem blob missing after Flush")
}
for offset, want := range map[byte][]byte{
bintrie.BasicDataLeafKey: basicData,
bintrie.CodeHashLeafKey: codeHash,
64: storageVal,
} {
got, err := extractStemOffset(blob, offset)
if err != nil {
t.Fatalf("extract offset %d: %v", offset, err)
}
if !bytes.Equal(got, want) {
t.Errorf("offset %d: got %x, want %x", offset, got, want)
}
}
// The other stem should also have its single offset.
otherBlob := rawdb.ReadBinTrieStem(db, otherStem)
if got, _ := extractStemOffset(otherBlob, bintrie.BasicDataLeafKey); !bytes.Equal(got, otherVal) {
t.Errorf("other stem BasicData: got %x, want %x", got, otherVal)
}
}
// TestBintrieCodecCrossFlushRMW verifies that writes to the SAME stem
// from DIFFERENT flush passes (simulating blocks N and N+1) correctly
// merge on disk. Flush1 writes offsets 0+1 at stemX; Flush2 writes
// offset 64 at the same stem. After both flushes, all three offsets
// must be readable — the second flush must not clobber the first.
//
// This is the regression test for cross-flush RMW correctness and is
// the bread-and-butter behavior of the per-stem codec layout. Before
// the A1 remediation, the buffer → disk shape mismatch also masked
// this (different writes would be invisible through the reader), so
// the regression test had no teeth.
func TestBintrieCodecCrossFlushRMW(t *testing.T) {
codec, db := newTestBintrieCodec(t)
stem := bytes.Repeat([]byte{0x99}, bintrie.StemSize)
mkKey := func(offset byte) common.Hash {
var k common.Hash
copy(k[:bintrie.StemSize], stem)
k[bintrie.StemSize] = offset
return k
}
basicVal := bytes.Repeat([]byte{0xAA}, stemBlobValueSize)
codeVal := bytes.Repeat([]byte{0xBB}, stemBlobValueSize)
slotVal := bytes.Repeat([]byte{0xCC}, stemBlobValueSize)
// Flush 1: write BasicData (offset 0) and CodeHash (offset 1).
batch := db.NewBatch()
codec.Flush(batch, nil, map[common.Hash][]byte{
mkKey(bintrie.BasicDataLeafKey): basicVal,
mkKey(bintrie.CodeHashLeafKey): codeVal,
}, nil, nil)
flushBatch(t, batch)
// Flush 2: write a header storage slot at offset 64 — same stem.
batch = db.NewBatch()
codec.Flush(batch, nil, map[common.Hash][]byte{
mkKey(64): slotVal,
}, nil, nil)
flushBatch(t, batch)
// After both flushes, all three offsets must be readable. Before
// the RMW, Flush 2 would overwrite the stem blob and erase
// BasicData + CodeHash.
if got := codec.ReadAccount(db, mkKey(bintrie.BasicDataLeafKey)); !bytes.Equal(got, basicVal) {
t.Errorf("BasicData lost after second flush: got %x, want %x", got, basicVal)
}
if got := codec.ReadAccount(db, mkKey(bintrie.CodeHashLeafKey)); !bytes.Equal(got, codeVal) {
t.Errorf("CodeHash lost after second flush: got %x, want %x", got, codeVal)
}
if got := codec.ReadAccount(db, mkKey(64)); !bytes.Equal(got, slotVal) {
t.Errorf("header slot at offset 64 missing: got %x, want %x", got, slotVal)
}
}
// TestBintrieCodecFlushDelete verifies that nil-valued entries in the
// accountData map clear the corresponding offset, and that clearing every
// populated offset at a stem removes the on-disk key entirely (matching
// the per-call DeleteStorage semantics tested elsewhere).
func TestBintrieCodecFlushDelete(t *testing.T) {
codec, db := newTestBintrieCodec(t)
// Seed: write two offsets at one stem.
stem := bytes.Repeat([]byte{0x77}, bintrie.StemSize)
v0 := bytes.Repeat([]byte{0x01}, stemBlobValueSize)
v1 := bytes.Repeat([]byte{0x02}, stemBlobValueSize)
mkKey := func(offset byte) common.Hash {
var k common.Hash
copy(k[:bintrie.StemSize], stem)
k[bintrie.StemSize] = offset
return k
}
batch := db.NewBatch()
codec.Flush(batch, nil, map[common.Hash][]byte{
mkKey(0): v0,
mkKey(1): v1,
}, nil, nil)
flushBatch(t, batch)
// Now flush a nil for offset 0 — only offset 1 should remain.
batch = db.NewBatch()
codec.Flush(batch, nil, map[common.Hash][]byte{mkKey(0): nil}, nil, nil)
flushBatch(t, batch)
blob := rawdb.ReadBinTrieStem(db, stem)
if got, _ := extractStemOffset(blob, 0); got != nil {
t.Errorf("offset 0 should be cleared, got %x", got)
}
if got, _ := extractStemOffset(blob, 1); !bytes.Equal(got, v1) {
t.Errorf("offset 1 should survive, got %x want %x", got, v1)
}
// Clear the last remaining offset; the on-disk key should disappear.
batch = db.NewBatch()
codec.Flush(batch, nil, map[common.Hash][]byte{mkKey(1): nil}, nil, nil)
flushBatch(t, batch)
if raw := rawdb.ReadBinTrieStem(db, stem); raw != nil {
t.Errorf("stem should be deleted, got %x", raw)
}
}

View file

@ -17,8 +17,6 @@
package pathdb
import (
"bytes"
"github.com/VictoriaMetrics/fastcache"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
@ -71,64 +69,16 @@ func writeNodes(batch ethdb.Batch, nodes map[common.Hash]map[string]*trienode.No
// This function assumes the background generator is already terminated and states
// before the supplied marker has been correctly generated.
//
// The codec parameter abstracts the trie-specific persistence: merkleFlatCodec
// performs a per-entry rawdb write for each accountData/storageData entry,
// while bintrieFlatCodec aggregates per-offset writes into per-stem
// read-modify-writes. Either way, the genMarker filtering, cache update, and
// metric reporting all happen inside the codec — writeStates is just a thin
// dispatcher.
//
// TODO(rjl493456442) do we really need this generation marker? The state updates
// after the marker can also be written and will be fixed by generator later if
// it's outdated.
func writeStates(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int) {
var (
accounts int
slots int
)
for addrHash, blob := range accountData {
// Skip any account not yet covered by the snapshot. The account
// at the generation marker position (addrHash == genMarker[:common.HashLength])
// should still be updated, as it would be skipped in the next
// generation cycle.
if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 {
continue
}
accounts += 1
if len(blob) == 0 {
rawdb.DeleteAccountSnapshot(batch, addrHash)
if clean != nil {
clean.Set(addrHash[:], nil)
}
} else {
rawdb.WriteAccountSnapshot(batch, addrHash, blob)
if clean != nil {
clean.Set(addrHash[:], blob)
}
}
}
for addrHash, storages := range storageData {
// Skip any account not covered yet by the snapshot
if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 {
continue
}
midAccount := genMarker != nil && bytes.Equal(addrHash[:], genMarker[:common.HashLength])
for storageHash, blob := range storages {
// Skip any storage slot not yet covered by the snapshot. The storage slot
// at the generation marker position (addrHash == genMarker[:common.HashLength]
// and storageHash == genMarker[common.HashLength:]) should still be updated,
// as it would be skipped in the next generation cycle.
if midAccount && bytes.Compare(storageHash[:], genMarker[common.HashLength:]) > 0 {
continue
}
slots += 1
key := storageKeySlice(addrHash, storageHash)
if len(blob) == 0 {
rawdb.DeleteStorageSnapshot(batch, addrHash, storageHash)
if clean != nil {
clean.Set(key, nil)
}
} else {
rawdb.WriteStorageSnapshot(batch, addrHash, storageHash, blob)
if clean != nil {
clean.Set(key, blob)
}
}
}
}
return accounts, slots
func writeStates(batch ethdb.Batch, codec flatStateCodec, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int, error) {
return codec.Flush(batch, genMarker, accountData, storageData, clean)
}

View file

@ -93,6 +93,7 @@ type generator struct {
running bool // Flag indicating whether the background generation is running
db ethdb.KeyValueStore // Key-value store containing the snapshot data
codec flatStateCodec // Flat-state codec for key derivation, persistence, iterators
stats *generatorStats // Generation statistics used throughout the entire life cycle
abort chan chan struct{} // Notification channel to abort generating the snapshot in this layer
done chan struct{} // Notification channel when generation is done
@ -109,7 +110,11 @@ type generator struct {
// progress indicates the starting position for resuming snapshot generation.
// It must be provided even if generation is not allowed; otherwise, uncovered
// states may be exposed for serving.
func newGenerator(db ethdb.KeyValueStore, noBuild bool, progress []byte, stats *generatorStats) *generator {
//
// codec is the flat-state codec used for marker handling, prefix selection,
// persistence, and iterator construction. It must match the codec configured
// on the owning Database.
func newGenerator(db ethdb.KeyValueStore, codec flatStateCodec, noBuild bool, progress []byte, stats *generatorStats) *generator {
if stats == nil {
stats = &generatorStats{start: time.Now()}
}
@ -117,6 +122,7 @@ func newGenerator(db ethdb.KeyValueStore, noBuild bool, progress []byte, stats *
noBuild: noBuild,
progress: progress,
db: db,
codec: codec,
stats: stats,
abort: make(chan chan struct{}),
done: make(chan struct{}),
@ -124,6 +130,13 @@ func newGenerator(db ethdb.KeyValueStore, noBuild bool, progress []byte, stats *
}
// run starts the state snapshot generation in the background.
//
// The dispatch on codec type chooses between the merkle two-tier
// account/storage iteration (`generate`) and the bintrie single-tier
// stem iteration (`generateBintrie`). Both share the same lifecycle
// (g.running, g.abort, g.done) and the same progress journal format,
// so the only difference visible to callers of run/stop is which
// background routine is launched.
func (g *generator) run(root common.Hash) {
if g.noBuild {
log.Warn("Snapshot generation is not permitted")
@ -134,7 +147,11 @@ func (g *generator) run(root common.Hash) {
log.Warn("Paused the leftover generation cycle")
}
g.running = true
go g.generate(newGeneratorContext(root, g.progress, g.db))
if _, isBintrie := g.codec.(*bintrieFlatCodec); isBintrie {
go g.generateBintrie(newBintrieGeneratorContext(root, g.progress, g.db))
return
}
go g.generate(newGeneratorContext(root, g.progress, g.db, g.codec))
}
// stop terminates the background generation if it's actively running.
@ -168,15 +185,6 @@ func (g *generator) progressMarker() []byte {
return g.progress
}
// splitMarker is an internal helper which splits the generation progress marker
// into two parts.
func splitMarker(marker []byte) ([]byte, []byte) {
var accMarker []byte
if len(marker) > 0 {
accMarker = marker[:common.HashLength]
}
return accMarker, marker
}
// generateSnapshot regenerates a brand-new snapshot based on an existing state
// database and head block asynchronously. The snapshot is returned immediately
@ -188,7 +196,7 @@ func generateSnapshot(triedb *Database, root common.Hash, noBuild bool) *diskLay
genMarker = []byte{} // Initialized but empty!
)
dl := newDiskLayer(root, 0, triedb, nil, nil, newBuffer(triedb.config.WriteBufferSize, nil, nil, 0), nil)
dl.setGenerator(newGenerator(triedb.diskdb, noBuild, genMarker, stats))
dl.setGenerator(newGenerator(triedb.diskdb, triedb.flatCodec, noBuild, genMarker, stats))
if !noBuild {
dl.generator.run(root)
@ -198,13 +206,20 @@ func generateSnapshot(triedb *Database, root common.Hash, noBuild bool) *diskLay
}
// journalProgress persists the generator stats into the database to resume later.
func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorStats) {
//
// It is a method on generator so it can stamp the journal entry with the
// active scheme (merkle vs. bintrie). loadGenerator uses that flag to
// discard journals from a different scheme rather than blindly resuming
// with an incompatible marker shape.
func (g *generator) journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorStats) {
// Write out the generator marker. Note it's a standalone disk layer generator
// which is not mixed with journal. It's ok if the generator is persisted while
// journal is not.
_, isBintrie := g.codec.(*bintrieFlatCodec)
entry := journalGenerator{
Done: marker == nil,
Marker: marker,
Done: marker == nil,
Marker: marker,
IsBintrie: isBintrie,
}
if stats != nil {
entry.Accounts = stats.accounts
@ -595,7 +610,7 @@ func (g *generator) checkAndFlush(ctx *generatorContext, current []byte) error {
// Persist the progress marker regardless of whether the batch is empty or not.
// It may happen that all the flat states in the database are correct, so the
// generator indeed makes progress even if there is nothing to commit.
journalProgress(ctx.batch, current, g.stats)
g.journalProgress(ctx.batch, current, g.stats)
// Flush out the database writes atomically
if err := ctx.batch.Write(); err != nil {
@ -633,12 +648,12 @@ func (g *generator) generateStorages(ctx *generatorContext, account common.Hash,
}(time.Now())
if delete {
rawdb.DeleteStorageSnapshot(ctx.batch, account, common.BytesToHash(key))
g.codec.DeleteStorage(ctx.batch, account, common.BytesToHash(key))
wipedStorageMeter.Mark(1)
return nil
}
if write {
rawdb.WriteStorageSnapshot(ctx.batch, account, common.BytesToHash(key), val)
g.codec.WriteStorage(ctx.batch, account, common.BytesToHash(key), val)
generatedStorageMeter.Mark(1)
} else {
recoveredStorageMeter.Mark(1)
@ -682,7 +697,7 @@ func (g *generator) generateAccounts(ctx *generatorContext, accMarker []byte) er
start := time.Now()
if delete {
rawdb.DeleteAccountSnapshot(ctx.batch, account)
g.codec.DeleteAccount(ctx.batch, account)
wipedAccountMeter.Mark(1)
accountWriteCounter.Inc(time.Since(start).Nanoseconds())
@ -708,7 +723,7 @@ func (g *generator) generateAccounts(ctx *generatorContext, accMarker []byte) er
} else {
data := types.SlimAccountRLP(acc)
dataLen = len(data)
rawdb.WriteAccountSnapshot(ctx.batch, account, data)
g.codec.WriteAccount(ctx.batch, account, data)
generatedAccountMeter.Mark(1)
}
g.stats.storage += common.StorageSize(1 + common.HashLength + dataLen)
@ -774,7 +789,7 @@ func (g *generator) generate(ctx *generatorContext) {
if len(g.progress) == 0 {
batch := g.db.NewBatch()
rawdb.WriteSnapshotRoot(batch, ctx.root)
journalProgress(batch, g.progress, g.stats)
g.journalProgress(batch, g.progress, g.stats)
if err := batch.Write(); err != nil {
log.Crit("Failed to write initialized state marker", "err", err)
}
@ -788,7 +803,7 @@ func (g *generator) generate(ctx *generatorContext) {
// processed twice by the generator(they are already processed in the
// last run) but it's fine.
var (
accMarker, _ = splitMarker(g.progress)
accMarker, _ = g.codec.SplitMarker(g.progress)
abort chan struct{}
)
if err := g.generateAccounts(ctx, accMarker); err != nil {
@ -807,7 +822,7 @@ func (g *generator) generate(ctx *generatorContext) {
// Snapshot fully generated, set the marker to nil.
// Note even there is nothing to commit, persist the
// generator anyway to mark the snapshot is complete.
journalProgress(ctx.batch, nil, g.stats)
g.journalProgress(ctx.batch, nil, g.stats)
if err := ctx.batch.Write(); err != nil {
log.Error("Failed to flush batch", "err", err)
abort = <-g.abort

View file

@ -0,0 +1,364 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package pathdb
import (
"bytes"
"errors"
"fmt"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie/bintrie"
"github.com/ethereum/go-ethereum/triedb/database"
)
// bintrieDiskStore is the bintrie equivalent of diskStore (the merkle
// reader used by the snapshot generator). The two differ in how
// NodeReader validates the requested state root: the merkle store
// hashes the on-disk account-trie root with keccak256, while the
// bintrie root must be deserialized as a binary node and rehashed with
// sha256 (the bintrie's native hash function). Sharing the merkle store
// would always fail validation for a bintrie root.
//
// Once validated, both stores read trie nodes by path via
// rawdb.ReadAccountTrieNode — the path-based key space is shared
// between the two schemes (the bintrie sits in the same namespace as
// the account trie because EIP-7864 unifies storage under accounts).
type bintrieDiskStore struct {
db ethdb.KeyValueStore
}
// NodeReader validates that the bintrie root currently persisted at the
// account-trie nil path matches the requested state root. The returned
// reader is a plain path-based diskReader (the same one used by the
// merkle generator) — only the validation logic differs.
func (s *bintrieDiskStore) NodeReader(stateRoot common.Hash) (database.NodeReader, error) {
// EmptyBinaryHash and the legacy EmptyRootHash are both treated as
// "trie has no persisted root" — neither has a corresponding on-disk
// node, and the bintrie itself short-circuits these cases inside
// NewBinaryTrie. We accept them here without touching the disk.
if stateRoot == (common.Hash{}) || stateRoot == types.EmptyBinaryHash || stateRoot == types.EmptyRootHash {
return &diskReader{s.db}, nil
}
blob := rawdb.ReadAccountTrieNode(s.db, nil)
if len(blob) == 0 {
return nil, fmt.Errorf("bintrie state %x is not available (empty root node)", stateRoot)
}
// DeserializeNode rehashes via sha256 internally; the resulting node's
// Hash() is the canonical bintrie root hash for the on-disk blob.
root, err := bintrie.DeserializeNode(blob, 0)
if err != nil {
return nil, fmt.Errorf("bintrie state %x: deserialize root: %w", stateRoot, err)
}
if got := root.Hash(); got != stateRoot {
return nil, fmt.Errorf("bintrie state %x is not available (have %x)", stateRoot, got)
}
return &diskReader{s.db}, nil
}
// bintrieGeneratorContext holds the state needed by a single bintrie
// snapshot generation cycle. Unlike generatorContext (which manages two
// holdable iterators over the on-disk merkle account/storage prefixes),
// the bintrie path iterates the trie itself and never re-reads the
// existing flat state. As a result the bintrie context is small: just
// a write batch, the target root, and a single 32-byte progress marker
// (the bintrie key (stem || offset) at which the previous run was
// interrupted).
//
// The context is recreated on every generator restart, mirroring the
// merkle generatorContext lifecycle.
type bintrieGeneratorContext struct {
root common.Hash // State root of the generation target
marker []byte // Resume marker — a full 32-byte (stem || offset) key
db ethdb.KeyValueStore // Key-value store containing trie nodes and stem blobs
batch ethdb.Batch // Database batch for atomic writes
logged time.Time // Timestamp of the last progress log message
}
// newBintrieGeneratorContext initializes a fresh context bound to the
// given target root, starting from the supplied resume marker. A nil or
// zero-length marker means "start from the beginning of the trie".
func newBintrieGeneratorContext(root common.Hash, marker []byte, db ethdb.KeyValueStore) *bintrieGeneratorContext {
return &bintrieGeneratorContext{
root: root,
marker: marker,
db: db,
batch: db.NewBatch(),
logged: time.Now(),
}
}
// close releases any resources held by the context. The bintrie path
// holds no long-lived iterators outside of generateBinTrieStems (which
// owns its iterator and releases it on return), so this is currently a
// no-op. It exists symmetrically with generatorContext.close so future
// resource additions have an obvious place to land.
func (ctx *bintrieGeneratorContext) close() {}
// generateBinTrieStems regenerates the bintrie flat-state by iterating
// the entire bintrie and emitting one stem blob per stem. The iterator
// yields leaves in stem-then-offset order, so we accumulate offsets in a
// per-stem builder and flush whenever the stem changes (and once more
// at the end of iteration).
//
// Resume support is structural: ctx.marker — a 32-byte (stem || offset)
// key — is fed straight to BinaryTrie.NodeIterator which positions on the
// first leaf with key >= marker via binaryNodeIterator.seek (added in
// Commit 1). Resuming inside a stem is safe because flushStem performs a
// read-modify-write: the builder's new offsets (from the resumed walk)
// are merged with the existing on-disk blob (from the prior pass). If
// the marker is at offset 3 of stemA, the resume processes offsets 3..N
// and the merge preserves offsets 0..2 from disk. One extra disk read
// per flushStem (the RMW) is negligible compared to the walk cost.
//
// Range proofs are deliberately not used here. The bintrie's Prove path
// is not implemented yet, and an iteration-only generation cycle is
// acceptable because regeneration is a one-time cost paid at startup.
//
// Code chunks (offsets 128..255) are written to the same stem blobs as
// account header and storage offsets — it keeps the stem encoding
// symmetric with the trie and means a future re-iteration regenerates
// the entire stem layout in one pass.
func (g *generator) generateBinTrieStems(ctx *bintrieGeneratorContext) error {
// Open the bintrie via the same disk-backed reader that the merkle
// generator uses. The diskStore reads trie nodes via
// rawdb.ReadAccountTrieNode/ReadStorageTrieNode against the
// already-namespaced verkle table (db.diskdb wraps it under
// VerklePrefix), so the same accessor works for both schemes.
tr, err := bintrie.NewBinaryTrie(ctx.root, &bintrieDiskStore{db: ctx.db})
if err != nil {
log.Info("Bintrie missing, snapshotting paused", "state", ctx.root, "err", err)
return errMissingTrie
}
it, err := tr.NodeIterator(ctx.marker)
if err != nil {
return err
}
var (
// currentStem is a freshly-allocated copy of the most recently
// observed leaf's stem. We never alias the iterator's slice
// because it can be invalidated on Next.
currentStem []byte
builder = newStemBuilder()
)
// flushStem performs a read-modify-write on the stem being accumulated:
// it reads the existing on-disk stem blob (if any), merges in the
// builder's new offsets (new values win over existing), and writes the
// merged result back. This makes mid-stem resume safe: if a prior pass
// wrote offsets 0..2 and the current pass (after resuming at offset 3)
// only has offsets 3..4 in the builder, the merge preserves 0..2 from
// disk and adds 3..4 — no data loss.
//
// Without this RMW, a mid-stem resume would overwrite the existing disk
// blob with a partial one, silently dropping the earlier offsets. This
// was bug C1 identified in the PR review.
flushStem := func() error {
if currentStem == nil || builder.empty() {
return nil
}
existing := rawdb.ReadBinTrieStem(ctx.db, currentStem)
writes := builder.toOffsetValues()
merged, err := mergeStemBlob(existing, writes)
if err != nil {
return fmt.Errorf("merge stem %x failed: %w", currentStem, err)
}
if merged == nil {
rawdb.DeleteBinTrieStem(ctx.batch, currentStem)
} else {
rawdb.WriteBinTrieStem(ctx.batch, currentStem, merged)
}
builder.reset()
// Bookkeeping: count one stem per emitted blob.
g.stats.accounts++
return nil
}
for it.Next(true) {
if !it.Leaf() {
continue
}
key := it.LeafKey()
val := it.LeafBlob()
// A well-formed bintrie leaf is always (32-byte key, 32-byte value).
// Defensive check so a malformed trie surfaces as an error rather
// than corrupting the flat state.
if len(key) != bintrie.StemSize+1 {
return fmt.Errorf("bintrie leaf key has len %d, want %d", len(key), bintrie.StemSize+1)
}
if len(val) != stemBlobValueSize {
return fmt.Errorf("bintrie leaf value has len %d, want %d", len(val), stemBlobValueSize)
}
// Stem boundary detection: if we've moved to a new stem, persist
// the previous one before starting a new builder.
if currentStem != nil && !bytes.Equal(key[:bintrie.StemSize], currentStem) {
if err := flushStem(); err != nil {
return err
}
currentStem = nil
}
if currentStem == nil {
currentStem = make([]byte, bintrie.StemSize)
copy(currentStem, key[:bintrie.StemSize])
}
// builder.set takes an owning copy internally so it's safe to
// hand it the iterator's transient value slice.
builder.set(key[bintrie.StemSize], val)
g.stats.slots++
g.stats.storage += common.StorageSize(1 + bintrie.StemSize + len(val))
// Use the FULL leaf key (stem || offset) as the progress marker
// so an interrupted run can resume mid-stem. checkAndFlushBin
// takes an owning copy because the iterator's key may be
// invalidated on the next call.
marker := make([]byte, len(key))
copy(marker, key)
if err := g.checkAndFlushBin(ctx, marker); err != nil {
return err
}
}
if err := it.Error(); err != nil {
return err
}
// Flush the trailing stem (the loop only flushes on transitions).
if err := flushStem(); err != nil {
return err
}
return nil
}
// checkAndFlushBin is the bintrie analogue of checkAndFlush. It saves
// progress as a single 32-byte (stem || offset) key and writes the
// batch when it exceeds IdealBatchSize, or when an abort signal is
// received.
//
// Unlike the merkle variant, there are no snapshot iterators to reopen
// here — the bintrie path iterates the trie itself, and the trie
// iterator manages its own resource lifetime.
func (g *generator) checkAndFlushBin(ctx *bintrieGeneratorContext, current []byte) error {
var abort chan struct{}
select {
case abort = <-g.abort:
default:
}
if ctx.batch.ValueSize() > ethdb.IdealBatchSize || abort != nil {
if bytes.Compare(current, g.progress) < 0 {
log.Error("Bintrie generator went backwards",
"current", fmt.Sprintf("%x", current),
"genMarker", fmt.Sprintf("%x", g.progress))
}
// Persist progress regardless of whether the batch is empty —
// it may be that all observed stems were already on disk and
// nothing actually changed.
g.journalProgress(ctx.batch, current, g.stats)
if err := ctx.batch.Write(); err != nil {
return err
}
ctx.batch.Reset()
g.lock.Lock()
g.progress = current
g.lock.Unlock()
if abort != nil {
g.stats.log("Aborting bintrie snapshot generation", ctx.root, g.progress)
return newAbortErr(abort)
}
}
if time.Since(ctx.logged) > 8*time.Second {
g.stats.log("Generating bintrie snapshot", ctx.root, g.progress)
ctx.logged = time.Now()
}
return nil
}
// generateBintrie is the bintrie analogue of the merkle `generate`
// background loop. The shapes mirror each other so the lifecycle and
// shutdown protocol look identical to callers (`run` / `stop`):
//
// 1. Persist the initial progress marker if this is a fresh run
// (so a crash after the first batch can find the genesis marker
// during recovery).
// 2. Drive generateBinTrieStems to completion (or until an abort).
// 3. On clean completion, write the "done" sentinel marker, log a
// summary, and close g.done.
// 4. On abort (internal error or external signal), close the abort
// channel and return.
func (g *generator) generateBintrie(ctx *bintrieGeneratorContext) {
g.stats.log("Resuming bintrie snapshot generation", ctx.root, g.progress)
defer ctx.close()
if len(g.progress) == 0 {
batch := ctx.db.NewBatch()
rawdb.WriteSnapshotRoot(batch, ctx.root)
g.journalProgress(batch, g.progress, g.stats)
if err := batch.Write(); err != nil {
log.Crit("Failed to write initialized bintrie state marker", "err", err)
}
}
var abort chan struct{}
if err := g.generateBinTrieStems(ctx); err != nil {
var aerr *abortErr
if errors.As(err, &aerr) {
abort = aerr.abort
}
// Internal error: wait for an external abort signal so the
// caller's stop() invocation can synchronize.
if abort == nil {
abort = <-g.abort
}
close(abort)
return
}
// Successful completion: write the nil "done" marker so subsequent
// loads know the snapshot is complete.
g.journalProgress(ctx.batch, nil, g.stats)
if err := ctx.batch.Write(); err != nil {
log.Error("Failed to flush bintrie batch", "err", err)
abort = <-g.abort
close(abort)
return
}
ctx.batch.Reset()
log.Info("Generated bintrie snapshot",
"stems", g.stats.accounts,
"leaves", g.stats.slots,
"storage", g.stats.storage,
"elapsed", common.PrettyDuration(time.Since(g.stats.start)))
g.lock.Lock()
g.progress = nil
g.lock.Unlock()
close(g.done)
// Block until the eventual stop() so the caller can wait for us.
abort = <-g.abort
close(abort)
}

View file

@ -0,0 +1,372 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package pathdb
import (
"bytes"
"testing"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/trie/bintrie"
"github.com/holiman/uint256"
)
// buildTestBintrie constructs a small in-memory bintrie containing two
// accounts and one storage slot, persists its serialized nodes into the
// supplied key-value store under the standard pathdb account-trie key
// space (which is what the bintrie reads back via diskStore), and returns
// the resulting state root.
//
// This helper sidesteps triedb.Database to avoid an import cycle: pathdb
// is a child of triedb, so the test cannot construct a triedb.Database
// here. Instead it manually persists the nodes returned by
// bintrie.Commit, mirroring what writeNodes would do in production.
func buildTestBintrie(t *testing.T, db ethdb.Database) (common.Hash, []addrAcct) {
t.Helper()
// Use a memory-backed NodeDatabase for the empty starting trie. The
// trie's nodeResolver returns nil for unknown hashes, which matches
// the empty-trie semantics expected by NewBinaryTrie.
tr, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, &diskStore{db: db})
if err != nil {
t.Fatalf("new bintrie: %v", err)
}
addr1 := common.HexToAddress("0x1111111111111111111111111111111111111111")
addr2 := common.HexToAddress("0x2222222222222222222222222222222222222222")
slot := common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000007")
slotValue := bytes.Repeat([]byte{0x77}, 32)
if err := tr.UpdateAccount(addr1, &types.StateAccount{
Nonce: 1,
Balance: uint256.NewInt(100),
CodeHash: types.EmptyCodeHash[:],
}, 0); err != nil {
t.Fatalf("update account 1: %v", err)
}
if err := tr.UpdateAccount(addr2, &types.StateAccount{
Nonce: 2,
Balance: uint256.NewInt(200),
CodeHash: types.EmptyCodeHash[:],
}, 0); err != nil {
t.Fatalf("update account 2: %v", err)
}
if err := tr.UpdateStorage(addr1, slot[:], slotValue); err != nil {
t.Fatalf("update storage: %v", err)
}
root, nodes := tr.Commit(false)
// Persist all collected nodes via the standard account-trie path
// scheme accessor — the bintrie sits in the same key space as the
// account trie because there are no per-account storage tries in
// EIP-7864.
batch := db.NewBatch()
for path, node := range nodes.Nodes {
if node.IsDeleted() {
rawdb.DeleteAccountTrieNode(batch, []byte(path))
continue
}
rawdb.WriteAccountTrieNode(batch, []byte(path), node.Blob)
}
if err := batch.Write(); err != nil {
t.Fatalf("flush trie nodes: %v", err)
}
return root, []addrAcct{
{addr: addr1, hasStorage: true, slot: slot, slotVal: slotValue},
{addr: addr2, hasStorage: false},
}
}
// addrAcct describes a test account so the assertions phase can re-derive
// the bintrie keys it should find on disk.
type addrAcct struct {
addr common.Address
hasStorage bool
slot common.Hash
slotVal []byte
}
// runTestBintrieGenerator wires up a generator with the bintrie codec and
// drives generateBinTrieStems to completion. It returns the codec and the
// underlying db so the assertions can read back stem blobs.
func runTestBintrieGenerator(t *testing.T, db ethdb.Database, root common.Hash, marker []byte) {
t.Helper()
codec := newBintrieFlatCodec(db)
gen := &generator{
db: db,
codec: codec,
stats: &generatorStats{start: time.Now()},
abort: make(chan chan struct{}, 1),
done: make(chan struct{}),
}
ctx := newBintrieGeneratorContext(root, marker, db)
defer ctx.close()
if err := gen.generateBinTrieStems(ctx); err != nil {
t.Fatalf("generateBinTrieStems: %v", err)
}
if err := ctx.batch.Write(); err != nil {
t.Fatalf("final batch write: %v", err)
}
}
// TestBintrieGeneratorRebuildsStems verifies the happy-path:
// - Build a small bintrie with two accounts and one storage slot.
// - Run the generator on its root.
// - Read back the stem blobs and check every offset round-trips.
//
// This is the primary "the generator works" test.
func TestBintrieGeneratorRebuildsStems(t *testing.T) {
db := rawdb.NewMemoryDatabase()
root, accounts := buildTestBintrie(t, db)
// Sanity-check that the bintrie isn't trivially empty.
if root == (common.Hash{}) || root == types.EmptyBinaryHash {
t.Fatal("test bintrie produced an empty root")
}
runTestBintrieGenerator(t, db, root, nil)
// Each test account must have its BasicData (offset 0) and CodeHash
// (offset 1) entries on disk after generation.
for _, a := range accounts {
stem := bintrie.GetBinaryTreeKeyBasicData(a.addr)[:bintrie.StemSize]
blob := rawdb.ReadBinTrieStem(db, stem)
if len(blob) == 0 {
t.Errorf("addr %x: stem blob missing after generation", a.addr)
continue
}
basic, err := extractStemOffset(blob, bintrie.BasicDataLeafKey)
if err != nil || len(basic) != 32 {
t.Errorf("addr %x: BasicData missing/invalid (err=%v len=%d)", a.addr, err, len(basic))
}
codeHash, err := extractStemOffset(blob, bintrie.CodeHashLeafKey)
if err != nil || !bytes.Equal(codeHash, types.EmptyCodeHash[:]) {
t.Errorf("addr %x: CodeHash mismatch (err=%v got=%x)", a.addr, err, codeHash)
}
}
// The storage slot must be present at its derived stem (which may
// equal the account's BasicData stem for header slots, or differ for
// out-of-header slots — slot 7 is in-header so we expect the same
// stem as BasicData).
a := accounts[0]
storageKey := bintrie.GetBinaryTreeKeyStorageSlot(a.addr, a.slot[:])
storageBlob := rawdb.ReadBinTrieStem(db, storageKey[:bintrie.StemSize])
if len(storageBlob) == 0 {
t.Fatal("storage stem blob missing")
}
got, err := extractStemOffset(storageBlob, storageKey[bintrie.StemSize])
if err != nil {
t.Fatalf("extract storage offset: %v", err)
}
if !bytes.Equal(got, a.slotVal) {
t.Errorf("storage value mismatch: got %x want %x", got, a.slotVal)
}
}
// TestBintrieGeneratorResumeStemBoundary verifies that a generator
// started from a stem-boundary marker (stem || offset 0) correctly
// generates only the stems at or after the marker.
func TestBintrieGeneratorResumeStemBoundary(t *testing.T) {
db := rawdb.NewMemoryDatabase()
root, accounts := buildTestBintrie(t, db)
stem1 := bintrie.GetBinaryTreeKeyBasicData(accounts[0].addr)[:bintrie.StemSize]
stem2 := bintrie.GetBinaryTreeKeyBasicData(accounts[1].addr)[:bintrie.StemSize]
larger := stem1
smaller := stem2
if bytes.Compare(stem1, stem2) < 0 {
larger, smaller = stem2, stem1
}
marker := make([]byte, 32)
copy(marker, larger)
runTestBintrieGenerator(t, db, root, marker)
if got := rawdb.ReadBinTrieStem(db, smaller); len(got) != 0 {
t.Errorf("smaller stem should have been skipped by resume marker, got %x", got)
}
if got := rawdb.ReadBinTrieStem(db, larger); len(got) == 0 {
t.Errorf("larger stem should have been generated after resume marker")
}
}
// TestBintrieGeneratorResumeMidStem is the regression test for review
// finding C1 (mid-stem resume drops earlier offsets). Before A3's fix,
// flushStem OVERWROTE the on-disk stem blob with only the offsets
// accumulated after the resume point. Offsets from a prior pass that
// were already on disk were silently lost.
//
// The test simulates a two-pass generation:
//
// 1. Pre-seed the disk with a stem blob containing offsets 0 and 1
// (simulating what a prior pass wrote before being interrupted).
// 2. Run the generator with marker = stem||1 (resume INSIDE the stem,
// past offset 0).
// 3. After the generator completes, verify that the on-disk blob
// contains ALL offsets (0, 1, and everything else the trie has)
// — not just the offsets from the resumed walk.
//
// Before A3: step 3 would show only the post-marker offsets.
func TestBintrieGeneratorResumeMidStem(t *testing.T) {
db := rawdb.NewMemoryDatabase()
root, accounts := buildTestBintrie(t, db)
// Pick addr1 (the one with storage). It has BasicData (offset 0),
// CodeHash (offset 1), and storage slot 7 at offset 64+7=71.
a := accounts[0]
stem := bintrie.GetBinaryTreeKeyBasicData(a.addr)[:bintrie.StemSize]
// Step 1: Pre-seed the disk with a partial stem blob containing
// only offsets 0 and 1 — as if a prior generator pass wrote them
// before being interrupted.
preSeed := newStemBuilder()
preSeed.set(bintrie.BasicDataLeafKey, bytes.Repeat([]byte{0xAA}, 32))
preSeed.set(bintrie.CodeHashLeafKey, bytes.Repeat([]byte{0xBB}, 32))
rawdb.WriteBinTrieStem(db, stem, preSeed.encode())
// Step 2: Resume from offset 1 — the generator should pick up at
// offset 1 of this stem and walk forward. The builder will
// accumulate only offset 1 + storage offset from the trie walk.
// The RMW in flushStem must merge them with the pre-seeded disk
// blob to preserve offset 0.
marker := make([]byte, 32)
copy(marker[:bintrie.StemSize], stem)
marker[bintrie.StemSize] = bintrie.CodeHashLeafKey // resume at offset 1
runTestBintrieGenerator(t, db, root, marker)
// Step 3: After the full run, verify the disk blob has ALL offsets.
blob := rawdb.ReadBinTrieStem(db, stem)
if len(blob) == 0 {
t.Fatal("stem blob missing after mid-stem resume")
}
// Offset 0 (BasicData): must survive the mid-stem resume because
// the RMW merged the builder's new content with the existing disk
// blob. Before A3, this offset was silently dropped.
basic, err := extractStemOffset(blob, bintrie.BasicDataLeafKey)
if err != nil {
t.Fatalf("extract BasicData: %v", err)
}
if len(basic) != 32 {
t.Fatalf("BasicData lost after mid-stem resume (A3 regression): got len=%d, want 32", len(basic))
}
// Offset 1 (CodeHash): the generator walked this offset (it's at
// the marker), so the trie's authoritative value should overwrite
// the pre-seeded one.
code, err := extractStemOffset(blob, bintrie.CodeHashLeafKey)
if err != nil {
t.Fatalf("extract CodeHash: %v", err)
}
if len(code) != 32 {
t.Fatalf("CodeHash missing after resume: got len=%d", len(code))
}
// Storage slot must also be present (the generator walked it as
// part of the full stem traversal).
storageKey := bintrie.GetBinaryTreeKeyStorageSlot(a.addr, a.slot[:])
storageOffset := storageKey[bintrie.StemSize]
storageStem := storageKey[:bintrie.StemSize]
if bytes.Equal(storageStem, stem) {
// Storage is at the same stem (header slot) — verify it's in the blob.
storageVal, err := extractStemOffset(blob, storageOffset)
if err != nil {
t.Fatalf("extract storage: %v", err)
}
if !bytes.Equal(storageVal, a.slotVal) {
t.Errorf("storage value: got %x, want %x", storageVal, a.slotVal)
}
}
}
// TestBintrieGeneratorWithContractCode verifies that the generator
// correctly writes code-chunk offsets (128..255) into stem blobs for
// contracts with non-trivial code. This is the A16/T10 test.
func TestBintrieGeneratorWithContractCode(t *testing.T) {
db := rawdb.NewMemoryDatabase()
// Build a bintrie with one contract that has ~100 bytes of code.
// Per EIP-7864, code is chunked into 31-byte pieces starting at
// offset 128 of the account's stem.
tr, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, &bintrieDiskStore{db: db})
if err != nil {
t.Fatalf("new bintrie: %v", err)
}
addr := common.HexToAddress("0xContractContractContractContractContrac")
code := make([]byte, 100)
for i := range code {
code[i] = byte(i)
}
if err := tr.UpdateAccount(addr, &types.StateAccount{
Nonce: 1,
Balance: uint256.NewInt(1000),
CodeHash: types.EmptyCodeHash[:],
}, len(code)); err != nil {
t.Fatalf("UpdateAccount: %v", err)
}
codeHash := common.BytesToHash(types.EmptyCodeHash[:])
if err := tr.UpdateContractCode(addr, codeHash, code); err != nil {
t.Fatalf("UpdateContractCode: %v", err)
}
root, nodes := tr.Commit(false)
// Persist trie nodes
batch := db.NewBatch()
for path, node := range nodes.Nodes {
if !node.IsDeleted() {
rawdb.WriteAccountTrieNode(batch, []byte(path), node.Blob)
}
}
if err := batch.Write(); err != nil {
t.Fatalf("flush trie nodes: %v", err)
}
// Run the generator
runTestBintrieGenerator(t, db, root, nil)
// Verify account header offsets are present.
stem := bintrie.GetBinaryTreeKeyBasicData(addr)[:bintrie.StemSize]
blob := rawdb.ReadBinTrieStem(db, stem)
if len(blob) == 0 {
t.Fatal("stem blob missing for contract account")
}
basic, _ := extractStemOffset(blob, bintrie.BasicDataLeafKey)
if len(basic) != 32 {
t.Errorf("BasicData: got len %d, want 32", len(basic))
}
codeHashLeaf, _ := extractStemOffset(blob, bintrie.CodeHashLeafKey)
if len(codeHashLeaf) != 32 {
t.Errorf("CodeHash: got len %d, want 32", len(codeHashLeaf))
}
// Verify at least one code chunk offset (128) is present.
// 100 bytes of code = ceil(100/31) = 4 chunks, at offsets 128..131.
codeChunk0, _ := extractStemOffset(blob, 128)
if len(codeChunk0) != 32 {
t.Errorf("Code chunk at offset 128: got len %d, want 32 (code chunk missing from stem blob)", len(codeChunk0))
}
}

View file

@ -137,7 +137,7 @@ func TestAccountIteratorBasics(t *testing.T) {
db := rawdb.NewMemoryDatabase()
batch := db.NewBatch()
states.write(batch, nil, nil)
states.write(batch, &merkleFlatCodec{}, nil, nil)
batch.Write()
it = newDiskAccountIterator(db, common.Hash{})
verifyIterator(t, 100, it, verifyNothing) // Nil is allowed for single layer iterator
@ -176,7 +176,7 @@ func TestStorageIteratorBasics(t *testing.T) {
db := rawdb.NewMemoryDatabase()
batch := db.NewBatch()
states.write(batch, nil, nil)
states.write(batch, &merkleFlatCodec{}, nil, nil)
batch.Write()
for account := range accounts {
it := newDiskStorageIterator(db, account, common.Hash{})

View file

@ -50,7 +50,11 @@ var (
// - Version 1: storage.Incomplete field is removed
// - Version 2: add post-modification state values
// - Version 3: a flag has been added to indicate whether the storage slot key is the raw key or a hash
const journalVersion uint64 = 3
// - Version 4: bintrie flat-state per-stem layout. The journalGenerator
// struct gains an IsBintrie flag (rlp:"optional", defaults to
// false) so the loader can discard journals from a mismatched
// scheme and trigger a full flat-state regeneration.
const journalVersion uint64 = 4
// loadJournal tries to parse the layer journal from the disk.
func (db *Database) loadJournal(diskRoot common.Hash) (layer, error) {
@ -119,10 +123,27 @@ type journalGenerator struct {
Accounts uint64
Slots uint64
Storage uint64
// IsBintrie distinguishes a bintrie generator's progress marker from a
// merkle one. The two markers have incompatible semantics (single-tier
// 32-byte stem||offset vs. two-tier accountHash+storageHash) and the
// loader discards the journal whenever this flag does not match the
// database's mode, forcing a full regeneration.
//
// Marshalled with rlp:"optional" so older v3 journals (which never
// wrote this field) decode cleanly to false — the merkle default.
IsBintrie bool `rlp:"optional"`
}
// loadGenerator loads the state generation progress marker from the database.
func loadGenerator(db ethdb.KeyValueReader, hash nodeHasher) (*journalGenerator, common.Hash, error) {
//
// isBintrie indicates the database's active scheme. A persisted generator
// from the *other* scheme is discarded outright (and a fresh marker is
// returned) because the marker shapes are mutually unintelligible: a
// merkle marker is two-tier accountHash+storageHash, while a bintrie
// marker is a single 32-byte stem||offset key. Resuming with the wrong
// shape would either skip large stretches of the trie or revisit them.
func loadGenerator(db ethdb.KeyValueReader, hash nodeHasher, isBintrie bool) (*journalGenerator, common.Hash, error) {
trieRoot, err := hash(rawdb.ReadAccountTrieNode(db, nil))
if err != nil {
return nil, common.Hash{}, err
@ -139,6 +160,15 @@ func loadGenerator(db ethdb.KeyValueReader, hash nodeHasher) (*journalGenerator,
log.Info("State snapshot generator is not compatible")
return nil, trieRoot, nil
}
// Scheme mismatch — drop the journal and force a full regeneration.
// IsBintrie defaults to false on legacy v3 entries (the field is
// rlp:"optional"), which is exactly the right answer for a merkle
// database opened against an old journal.
if generator.IsBintrie != isBintrie {
log.Info("State snapshot generator is for a different scheme, discarding",
"journalIsBintrie", generator.IsBintrie, "dbIsBintrie", isBintrie)
return nil, trieRoot, nil
}
// The state snapshot is inconsistent with the trie data and must
// be rebuilt.
//

View file

@ -51,6 +51,26 @@ func (loc nodeLoc) string() string {
return fmt.Sprintf("loc: %s, depth: %d", loc.loc, loc.depth)
}
// RawStateReader is an extension of database.StateReader that exposes raw
// byte access to flat-state entries without applying any scheme-specific
// decoding (slim-RLP for merkle, no-op for bintrie). The bintrie state
// reader in core/state uses it to fetch the BasicData and CodeHash leaves
// for an account separately and reconstruct a slim account locally.
//
// The merkle pathdb reader implements this interface trivially because
// it already has AccountRLP. Callers should type-assert before using it
// rather than relying on the database.StateReader interface unconditionally.
type RawStateReader interface {
database.StateReader
// AccountRLP returns the raw flat-state entry stored under the given
// lookup key. Semantics depend on the active codec:
// - merkle: slim-RLP-encoded account bytes
// - bintrie: 32-byte leaf value at the (stem || offset) tuple
// Returns nil if the entry is not present.
AccountRLP(hash common.Hash) ([]byte, error)
}
// reader implements the database.NodeReader interface, providing the functionalities to
// retrieve trie nodes by wrapping the internal state layer.
type reader struct {
@ -260,7 +280,7 @@ func (r *HistoricalStateReader) AccountRLP(address common.Address) ([]byte, erro
// and try to define a low granularity lock if the current approach doesn't
// work later.
dl := r.db.tree.bottom()
hash := crypto.Keccak256Hash(address.Bytes())
hash := r.db.flatCodec.AccountKey(address)
latest, err := dl.account(hash, 0)
if err != nil {
return nil, err
@ -310,8 +330,7 @@ func (r *HistoricalStateReader) Storage(address common.Address, key common.Hash)
// and try to define a low granularity lock if the current approach doesn't
// work later.
dl := r.db.tree.bottom()
addrHash := crypto.Keccak256Hash(address.Bytes())
keyHash := crypto.Keccak256Hash(key.Bytes())
addrHash, keyHash := r.db.flatCodec.StorageKey(address, key)
latest, err := dl.storage(addrHash, keyHash, 0)
if err != nil {
return nil, err

View file

@ -25,7 +25,6 @@ import (
"github.com/VictoriaMetrics/fastcache"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics"
@ -424,8 +423,8 @@ func (s *stateSet) decode(r *rlp.Stream) error {
}
// write flushes state mutations into the provided database batch as a whole.
func (s *stateSet) write(batch ethdb.Batch, genMarker []byte, clean *fastcache.Cache) (int, int) {
return writeStates(batch, genMarker, s.accountData, s.storageData, clean)
func (s *stateSet) write(batch ethdb.Batch, codec flatStateCodec, genMarker []byte, clean *fastcache.Cache) (int, int, error) {
return writeStates(batch, codec, genMarker, s.accountData, s.storageData, clean)
}
// reset clears all cached state data, including any optional sorted lists that
@ -438,11 +437,13 @@ func (s *stateSet) reset() {
s.storageListSorted = make(map[common.Hash][]common.Hash)
}
// dbsize returns the approximate size for db write.
func (s *stateSet) dbsize() int {
m := len(s.accountData) * len(rawdb.SnapshotAccountPrefix)
// dbsize returns the approximate size for db write. The codec supplies
// the per-entry on-disk overhead so this calculation tracks the actual
// schema in use (merkle vs. bintrie).
func (s *stateSet) dbsize(codec flatStateCodec) int {
m := len(s.accountData) * codec.AccountPrefixSize()
for _, slots := range s.storageData {
m += len(slots) * len(rawdb.SnapshotStoragePrefix)
m += len(slots) * codec.StoragePrefixSize()
}
return m + int(s.size)
}

353
triedb/pathdb/stem_blob.go Normal file
View file

@ -0,0 +1,353 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package pathdb
import (
"errors"
"fmt"
"math/bits"
"github.com/ethereum/go-ethereum/common"
)
// Bintrie stem blob layout
// ------------------------
//
// The flat-state representation of a bintrie stem packs the populated
// (offset, 32-byte value) pairs at that stem into a single on-disk blob.
// A stem holds up to 256 offsets (per EIP-7864, the full "stem group"),
// but in practice only a handful are populated for any given account
// (BasicData at offset 0, CodeHash at offset 1, a few storage slots, or
// code chunks). A dense encoding would waste 8 KB per stem; this layout
// scales linearly with the number of populated offsets.
//
// Layout:
//
// [ 0 .. 31 ] 32-byte bitmap; bit i set iff offset i has a value
// [32 .. 63 ] first populated offset's 32-byte value
// [64 .. 95 ] second populated offset's 32-byte value
// ...
// [32 + 32*(N-1) .. 32 + 32*N - 1] N-th populated offset's value
//
// where N = popcount(bitmap). Values appear in increasing offset order,
// which is the iteration order of the bitmap bits from least- to
// most-significant byte (byte 0 first, then byte 1, etc.), and within
// each byte from MSB (offset b*8) to LSB (offset b*8+7).
//
// An "absent" offset is one whose bitmap bit is clear; an offset whose
// value is 32 zero bytes is "present with zero value" — that is the
// tombstone convention used by BinaryTrie.DeleteStorage, which writes
// 32 zero bytes to mark a slot as cleared without removing it from the
// underlying StemNode's Values slice.
//
// An empty stem (all bits clear) is represented by a zero-length blob,
// and callers must delete the on-disk key rather than write a zero-length
// value.
const (
stemBlobBitmapSize = 32 // bytes
stemBlobBitmapBits = stemBlobBitmapSize * 8 // 256
stemBlobValueSize = common.HashLength // 32
)
// stemOffsetMax is the highest valid offset within a bintrie stem.
const stemOffsetMax = stemBlobBitmapBits - 1 // 255
var (
errStemBlobTooShort = errors.New("stem blob shorter than bitmap")
errStemBlobMalformed = errors.New("stem blob length does not match bitmap popcount")
errStemBlobValueOutOfRange = errors.New("stem blob value slice out of range")
)
// encodeStemBlob encodes a bitmap and a dense values slice (one entry per
// set bit, in ascending offset order) into the wire format described at
// the top of this file.
//
// The caller must ensure len(values) == popcount(bitmap) and that every
// entry in values has len == 32. If every bitmap bit is clear the function
// returns nil so the caller knows to delete the on-disk key.
func encodeStemBlob(bitmap [stemBlobBitmapSize]byte, values [][]byte) ([]byte, error) {
count := bitmapPopcount(bitmap)
if count != len(values) {
return nil, fmt.Errorf("stem blob popcount=%d values=%d: %w", count, len(values), errStemBlobMalformed)
}
if count > stemBlobBitmapBits {
return nil, fmt.Errorf("stem blob value count %d exceeds max %d: %w", count, stemBlobBitmapBits, errStemBlobMalformed)
}
if count == 0 {
return nil, nil
}
out := make([]byte, stemBlobBitmapSize+count*stemBlobValueSize)
copy(out, bitmap[:])
for i, v := range values {
if len(v) != stemBlobValueSize {
return nil, fmt.Errorf("stem blob value %d has len %d: %w", i, len(v), errStemBlobMalformed)
}
copy(out[stemBlobBitmapSize+i*stemBlobValueSize:], v)
}
return out, nil
}
// decodeStemBlob parses a raw stem blob into its bitmap and an ordered
// slice of populated 32-byte values. The returned values alias the input
// slice; callers must not retain or mutate them without copying first.
//
// A nil or zero-length blob decodes to a zero bitmap and no values
// (equivalent to "no offsets present").
func decodeStemBlob(blob []byte) ([stemBlobBitmapSize]byte, [][]byte, error) {
var bitmap [stemBlobBitmapSize]byte
if len(blob) == 0 {
return bitmap, nil, nil
}
if len(blob) < stemBlobBitmapSize {
return bitmap, nil, errStemBlobTooShort
}
copy(bitmap[:], blob[:stemBlobBitmapSize])
count := bitmapPopcount(bitmap)
expected := stemBlobBitmapSize + count*stemBlobValueSize
if len(blob) != expected {
return bitmap, nil, fmt.Errorf("stem blob len=%d popcount=%d expected=%d: %w", len(blob), count, expected, errStemBlobMalformed)
}
if count == 0 {
return bitmap, nil, nil
}
values := make([][]byte, count)
for i := range values {
start := stemBlobBitmapSize + i*stemBlobValueSize
values[i] = blob[start : start+stemBlobValueSize]
}
return bitmap, values, nil
}
// extractStemOffset returns the 32-byte value at the given offset within
// a stem blob, or nil if the offset is not present. It does not allocate;
// the returned slice aliases the input blob and must not be mutated.
//
// Returns an error only if the blob itself is malformed. An absent offset
// in a well-formed blob is (nil, nil) — not an error.
func extractStemOffset(blob []byte, offset byte) ([]byte, error) {
if len(blob) == 0 {
return nil, nil
}
if len(blob) < stemBlobBitmapSize {
return nil, errStemBlobTooShort
}
var bitmap [stemBlobBitmapSize]byte
copy(bitmap[:], blob[:stemBlobBitmapSize])
// Is the offset present at all?
if !bitmapGet(bitmap, offset) {
return nil, nil
}
// Count how many set bits precede this offset to find the value slot.
idx := bitmapRank(bitmap, offset)
start := stemBlobBitmapSize + idx*stemBlobValueSize
end := start + stemBlobValueSize
if end > len(blob) {
return nil, errStemBlobValueOutOfRange
}
return blob[start:end], nil
}
// stemBuilder accumulates (offset, value) pairs and produces a stem blob.
// It supports loading an existing blob, setting individual offsets, and
// emitting the final encoded form.
//
// Setting a value of nil or an empty slice clears the corresponding bit
// from the bitmap (the offset becomes "absent"). Setting a non-nil
// 32-byte slice — including 32 zero bytes — marks the offset present
// with that value. This preserves the distinction between absent and
// tombstoned-with-zero used elsewhere in the bintrie code.
//
// A stemBuilder is not safe for concurrent use.
type stemBuilder struct {
bitmap [stemBlobBitmapSize]byte
// values stores the current value at each offset, or nil if absent.
// Using a fixed 256-entry array avoids allocation churn as offsets
// are set and cleared.
values [stemBlobBitmapBits][]byte
}
// newStemBuilder returns an empty stemBuilder.
func newStemBuilder() *stemBuilder {
return &stemBuilder{}
}
// loadFromBlob merges the entries of the given stem blob into the builder.
// Existing entries at the same offsets are overwritten. An empty blob is
// a no-op.
func (b *stemBuilder) loadFromBlob(blob []byte) error {
if len(blob) == 0 {
return nil
}
bitmap, values, err := decodeStemBlob(blob)
if err != nil {
return err
}
// Walk the bitmap and copy each populated offset into the builder,
// stepping the values index in sync.
var vi int
for offset := range stemBlobBitmapBits {
if !bitmapGet(bitmap, byte(offset)) {
continue
}
// decodeStemBlob returns slices aliasing the input blob; we take
// an owning copy so the builder survives the caller mutating or
// releasing the source blob.
v := make([]byte, stemBlobValueSize)
copy(v, values[vi])
b.values[offset] = v
b.bitmap[offset/8] |= 1 << (7 - uint(offset%8))
vi++
}
return nil
}
// set writes value at the given offset. A nil or empty-length value
// clears the offset (bitmap bit cleared). A non-nil 32-byte value sets
// the offset present with that value. Setting with any other length
// panics — callers are expected to always pass 32-byte values.
func (b *stemBuilder) set(offset byte, value []byte) {
if len(value) == 0 {
b.values[offset] = nil
b.bitmap[offset/8] &^= 1 << (7 - uint(offset%8))
return
}
if len(value) != stemBlobValueSize {
panic(fmt.Sprintf("stemBuilder: value at offset %d has len %d, want %d", offset, len(value), stemBlobValueSize))
}
// Own the bytes so later caller mutations don't aliasing-surprise us.
owned := make([]byte, stemBlobValueSize)
copy(owned, value)
b.values[offset] = owned
b.bitmap[offset/8] |= 1 << (7 - uint(offset%8))
}
// empty reports whether no offsets are currently populated in the builder.
func (b *stemBuilder) empty() bool {
return bitmapPopcount(b.bitmap) == 0
}
// encode produces the stem blob encoding for the builder's current state.
// Returns nil for an empty builder so the caller can decide to delete the
// on-disk key rather than write a zero-length value.
func (b *stemBuilder) encode() []byte {
count := bitmapPopcount(b.bitmap)
if count == 0 {
return nil
}
out := make([]byte, stemBlobBitmapSize+count*stemBlobValueSize)
copy(out, b.bitmap[:])
// Walk the bitmap in ascending order, copying each populated value.
pos := stemBlobBitmapSize
for offset := range stemBlobBitmapBits {
if b.values[offset] == nil {
continue
}
copy(out[pos:], b.values[offset])
pos += stemBlobValueSize
}
return out
}
// reset clears all entries in the builder.
func (b *stemBuilder) reset() {
b.bitmap = [stemBlobBitmapSize]byte{}
b.values = [stemBlobBitmapBits][]byte{}
}
// toOffsetValues converts the builder's populated entries into a slice
// of (offset, value) pairs suitable for passing to mergeStemBlob. Only
// offsets with non-nil values are emitted; cleared (nil-value) offsets
// are skipped since their absence in the merge input leaves the
// existing blob's value intact — which is the correct behavior for the
// generator's RMW pattern where the builder holds only the new writes.
func (b *stemBuilder) toOffsetValues() []stemOffsetValue {
count := bitmapPopcount(b.bitmap)
if count == 0 {
return nil
}
out := make([]stemOffsetValue, 0, count)
for offset := range stemBlobBitmapBits {
if b.values[offset] != nil {
out = append(out, stemOffsetValue{
Offset: byte(offset),
Value: b.values[offset],
})
}
}
return out
}
// stemOffsetValue is a single (offset, value) pair passed to mergeStemBlob.
// A nil Value clears the offset.
type stemOffsetValue struct {
Offset byte
Value []byte
}
// mergeStemBlob performs a read-modify-write on a stem blob: it decodes
// the existing blob (if any), applies the given writes in order, and
// returns a freshly encoded blob. Returns (nil, nil) when the result is
// empty — the caller should delete the on-disk key in that case.
func mergeStemBlob(existing []byte, writes []stemOffsetValue) ([]byte, error) {
b := newStemBuilder()
if err := b.loadFromBlob(existing); err != nil {
return nil, err
}
for _, w := range writes {
b.set(w.Offset, w.Value)
}
return b.encode(), nil
}
// bitmapPopcount returns the number of set bits in the 32-byte bitmap.
func bitmapPopcount(bitmap [stemBlobBitmapSize]byte) int {
var n int
for _, b := range bitmap {
n += bits.OnesCount8(b)
}
return n
}
// bitmapGet returns whether bit `offset` is set in the bitmap. The
// convention mirrors the bintrie: bit index `offset` lives in byte
// `offset/8`, with the MSB of that byte corresponding to the lowest
// in-byte offset (`offset%8 == 0`).
func bitmapGet(bitmap [stemBlobBitmapSize]byte, offset byte) bool {
return bitmap[offset/8]&(1<<(7-uint(offset%8))) != 0
}
// bitmapRank returns the number of set bits that come strictly before
// `offset` (in ascending offset order). The offset itself does not count.
func bitmapRank(bitmap [stemBlobBitmapSize]byte, offset byte) int {
// Full whole bytes before the target.
byteIdx := int(offset) / 8
var rank int
for i := range byteIdx {
rank += bits.OnesCount8(bitmap[i])
}
// Bits within the target byte that are above the target's bit.
bitIdx := offset % 8
if bitIdx > 0 {
// The MSB is offset%8==0. We want bits 0..bitIdx-1 in that layout,
// which are the top bitIdx bits of the byte.
mask := byte(0xFF << (8 - bitIdx))
rank += bits.OnesCount8(bitmap[byteIdx] & mask)
}
return rank
}

View file

@ -0,0 +1,143 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package pathdb
import (
"bytes"
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/trie/bintrie"
)
// TestStemBlobOffset127_128Boundary tests the bitmap byte boundary
// between offset 127 (last header storage slot) and offset 128
// (first code chunk). Off-by-one in bitmapRank at this boundary
// would cause extractStemOffset to return the wrong value.
func TestStemBlobOffset127_128Boundary(t *testing.T) {
b := newStemBuilder()
val127 := bytes.Repeat([]byte{0x7F}, stemBlobValueSize)
val128 := bytes.Repeat([]byte{0x80}, stemBlobValueSize)
b.set(127, val127)
b.set(128, val128)
blob := b.encode()
if blob == nil {
t.Fatal("encode returned nil for 2-offset builder")
}
got127, err := extractStemOffset(blob, 127)
if err != nil {
t.Fatalf("extract offset 127: %v", err)
}
if !bytes.Equal(got127, val127) {
t.Errorf("offset 127: got %x, want %x", got127, val127)
}
got128, err := extractStemOffset(blob, 128)
if err != nil {
t.Fatalf("extract offset 128: %v", err)
}
if !bytes.Equal(got128, val128) {
t.Errorf("offset 128: got %x, want %x", got128, val128)
}
// Verify bitmapRank correctness at the byte boundary.
var bitmap [stemBlobBitmapSize]byte
copy(bitmap[:], blob[:stemBlobBitmapSize])
if r := bitmapRank(bitmap, 127); r != 0 {
t.Errorf("bitmapRank(127) = %d, want 0", r)
}
if r := bitmapRank(bitmap, 128); r != 1 {
t.Errorf("bitmapRank(128) = %d, want 1", r)
}
}
// TestStemBlobFull256DeleteMiddle tests a fully-populated stem (all 256
// offsets) where one offset in the middle is deleted.
func TestStemBlobFull256DeleteMiddle(t *testing.T) {
b := newStemBuilder()
for i := range 256 {
val := bytes.Repeat([]byte{byte(i)}, stemBlobValueSize)
b.set(byte(i), val)
}
if bitmapPopcount(b.bitmap) != 256 {
t.Fatalf("full builder has popcount %d, want 256", bitmapPopcount(b.bitmap))
}
b.set(128, nil) // delete the middle
if bitmapPopcount(b.bitmap) != 255 {
t.Fatalf("after delete: popcount %d, want 255", bitmapPopcount(b.bitmap))
}
blob := b.encode()
expectedSize := stemBlobBitmapSize + 255*stemBlobValueSize
if len(blob) != expectedSize {
t.Fatalf("blob size %d, want %d", len(blob), expectedSize)
}
got128, _ := extractStemOffset(blob, 128)
if got128 != nil {
t.Errorf("offset 128 should be absent, got %x", got128)
}
got127, _ := extractStemOffset(blob, 127)
if !bytes.Equal(got127, bytes.Repeat([]byte{127}, stemBlobValueSize)) {
t.Errorf("offset 127 corrupted after deleting 128")
}
got129, _ := extractStemOffset(blob, 129)
if !bytes.Equal(got129, bytes.Repeat([]byte{129}, stemBlobValueSize)) {
t.Errorf("offset 129 corrupted after deleting 128")
}
got0, _ := extractStemOffset(blob, 0)
if !bytes.Equal(got0, bytes.Repeat([]byte{0}, stemBlobValueSize)) {
t.Errorf("offset 0 corrupted")
}
got255, _ := extractStemOffset(blob, 255)
if !bytes.Equal(got255, bytes.Repeat([]byte{255}, stemBlobValueSize)) {
t.Errorf("offset 255 corrupted")
}
}
// TestFlushIdempotency verifies that flushing the same data twice
// produces an identical on-disk blob.
func TestFlushIdempotency(t *testing.T) {
codec, db := newTestBintrieCodec(t)
stem := bytes.Repeat([]byte{0x55}, bintrie.StemSize)
mkKey := func(offset byte) common.Hash {
var k common.Hash
copy(k[:bintrie.StemSize], stem)
k[bintrie.StemSize] = offset
return k
}
val := bytes.Repeat([]byte{0xAA}, stemBlobValueSize)
batch := db.NewBatch()
codec.Flush(batch, nil, map[common.Hash][]byte{mkKey(5): val}, nil, nil)
flushBatch(t, batch)
blob1 := rawdb.ReadBinTrieStem(db, stem)
batch = db.NewBatch()
codec.Flush(batch, nil, map[common.Hash][]byte{mkKey(5): val}, nil, nil)
flushBatch(t, batch)
blob2 := rawdb.ReadBinTrieStem(db, stem)
if !bytes.Equal(blob1, blob2) {
t.Errorf("Flush is not idempotent: blob1 len=%d blob2 len=%d", len(blob1), len(blob2))
}
}

View file

@ -0,0 +1,361 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package pathdb
import (
"bytes"
"testing"
)
// mkval constructs a 32-byte value where the first byte is tag and the
// rest are zero. Used to make test assertions easy to read.
func mkval(tag byte) []byte {
v := make([]byte, stemBlobValueSize)
v[0] = tag
return v
}
// TestStemBlobEmpty verifies that a builder with no entries encodes to
// nil (so callers delete the key) and decodes back to a zero bitmap and
// no values.
func TestStemBlobEmpty(t *testing.T) {
b := newStemBuilder()
if !b.empty() {
t.Fatal("fresh builder should be empty")
}
blob := b.encode()
if blob != nil {
t.Fatalf("empty builder should encode to nil, got %x", blob)
}
// Decode nil and empty slice both yield an empty result.
for _, input := range [][]byte{nil, {}} {
bitmap, values, err := decodeStemBlob(input)
if err != nil {
t.Fatalf("decode empty: %v", err)
}
if values != nil {
t.Fatalf("decode empty values: got %v, want nil", values)
}
for i, b := range bitmap {
if b != 0 {
t.Fatalf("decode empty bitmap byte %d: got 0x%02x, want 0", i, b)
}
}
}
}
// TestStemBlobBasicDataAndCodeHash verifies the "account header" encoding
// pattern: offsets 0 and 1 populated. This is the common case for every
// account update.
func TestStemBlobBasicDataAndCodeHash(t *testing.T) {
b := newStemBuilder()
basicData := mkval(0xAA)
codeHash := mkval(0xBB)
b.set(0, basicData)
b.set(1, codeHash)
if b.empty() {
t.Fatal("builder should not be empty after two sets")
}
blob := b.encode()
if blob == nil {
t.Fatal("encode should not return nil for populated builder")
}
if got, want := len(blob), stemBlobBitmapSize+2*stemBlobValueSize; got != want {
t.Fatalf("blob length: got %d, want %d", got, want)
}
// Roundtrip through decodeStemBlob.
bitmap, values, err := decodeStemBlob(blob)
if err != nil {
t.Fatalf("decode: %v", err)
}
if got := bitmapPopcount(bitmap); got != 2 {
t.Fatalf("popcount: got %d, want 2", got)
}
if !bitmapGet(bitmap, 0) || !bitmapGet(bitmap, 1) {
t.Fatalf("bitmap missing offset 0 or 1: %x", bitmap)
}
if !bytes.Equal(values[0], basicData) {
t.Fatalf("value[0]: got %x, want %x", values[0], basicData)
}
if !bytes.Equal(values[1], codeHash) {
t.Fatalf("value[1]: got %x, want %x", values[1], codeHash)
}
// Point reads via extractStemOffset.
got, err := extractStemOffset(blob, 0)
if err != nil {
t.Fatalf("extract offset 0: %v", err)
}
if !bytes.Equal(got, basicData) {
t.Fatalf("extract 0: got %x, want %x", got, basicData)
}
got, err = extractStemOffset(blob, 1)
if err != nil {
t.Fatalf("extract offset 1: %v", err)
}
if !bytes.Equal(got, codeHash) {
t.Fatalf("extract 1: got %x, want %x", got, codeHash)
}
// An unset offset returns (nil, nil).
got, err = extractStemOffset(blob, 42)
if err != nil {
t.Fatalf("extract unset offset: %v", err)
}
if got != nil {
t.Fatalf("extract unset: got %x, want nil", got)
}
}
// TestStemBlobAllOffsets verifies that a fully-populated stem (all 256
// offsets) encodes and decodes correctly. This is the worst-case size.
func TestStemBlobAllOffsets(t *testing.T) {
b := newStemBuilder()
for i := range stemBlobBitmapBits {
b.set(byte(i), mkval(byte(i)))
}
blob := b.encode()
expectedLen := stemBlobBitmapSize + stemBlobBitmapBits*stemBlobValueSize
if len(blob) != expectedLen {
t.Fatalf("blob length: got %d, want %d", len(blob), expectedLen)
}
bitmap, _, err := decodeStemBlob(blob)
if err != nil {
t.Fatalf("decode: %v", err)
}
if bitmapPopcount(bitmap) != stemBlobBitmapBits {
t.Fatalf("popcount: got %d, want %d", bitmapPopcount(bitmap), stemBlobBitmapBits)
}
for i := range stemBlobBitmapBits {
got, err := extractStemOffset(blob, byte(i))
if err != nil {
t.Fatalf("extract %d: %v", i, err)
}
if got[0] != byte(i) {
t.Fatalf("extract %d: tag 0x%02x, want 0x%02x", i, got[0], byte(i))
}
}
}
// TestStemBlobSparseHighOffsets verifies that non-contiguous offsets
// (typical for storage slots scattered across the stem) round-trip
// correctly.
func TestStemBlobSparseHighOffsets(t *testing.T) {
b := newStemBuilder()
offsets := []byte{3, 17, 64, 127, 128, 200, 255}
for _, o := range offsets {
b.set(o, mkval(o))
}
blob := b.encode()
if len(blob) != stemBlobBitmapSize+len(offsets)*stemBlobValueSize {
t.Fatalf("unexpected blob length: %d", len(blob))
}
// Extract each and verify, including some absent offsets in between.
for _, o := range offsets {
got, err := extractStemOffset(blob, o)
if err != nil {
t.Fatalf("extract %d: %v", o, err)
}
if got[0] != o {
t.Fatalf("extract %d: tag 0x%02x, want 0x%02x", o, got[0], o)
}
}
// Spot-check absent offsets between populated ones.
for _, o := range []byte{0, 1, 2, 4, 18, 63, 126, 129, 199, 254} {
got, err := extractStemOffset(blob, o)
if err != nil {
t.Fatalf("extract absent %d: %v", o, err)
}
if got != nil {
t.Fatalf("extract absent %d: got %x, want nil", o, got)
}
}
}
// TestStemBlobSetClearRoundtrip verifies that setting and then clearing
// an offset leaves the builder in the same state as never setting it.
func TestStemBlobSetClearRoundtrip(t *testing.T) {
b := newStemBuilder()
b.set(5, mkval(0xCD))
if b.empty() {
t.Fatal("should not be empty after set")
}
b.set(5, nil)
if !b.empty() {
t.Fatal("should be empty after clearing the only entry")
}
if blob := b.encode(); blob != nil {
t.Fatalf("encode after clear: got %x, want nil", blob)
}
}
// TestStemBlobLoadFromBlob verifies that an existing blob can be loaded
// into a fresh builder for read-modify-write semantics.
func TestStemBlobLoadFromBlob(t *testing.T) {
// Build an initial blob with two entries.
b1 := newStemBuilder()
b1.set(0, mkval(0x11))
b1.set(64, mkval(0x22))
initial := b1.encode()
// Load into a fresh builder, modify, encode.
b2 := newStemBuilder()
if err := b2.loadFromBlob(initial); err != nil {
t.Fatalf("loadFromBlob: %v", err)
}
b2.set(0, mkval(0x33)) // overwrite offset 0
b2.set(64, nil) // clear offset 64
b2.set(128, mkval(0x44)) // add offset 128
updated := b2.encode()
// Offset 0 should have the new value.
got, err := extractStemOffset(updated, 0)
if err != nil || got == nil || got[0] != 0x33 {
t.Fatalf("offset 0 after update: got %x err=%v, want tag 0x33", got, err)
}
// Offset 64 should be absent.
got, err = extractStemOffset(updated, 64)
if err != nil {
t.Fatalf("offset 64 after clear: %v", err)
}
if got != nil {
t.Fatalf("offset 64 after clear: got %x, want nil", got)
}
// Offset 128 should have the new value.
got, err = extractStemOffset(updated, 128)
if err != nil || got == nil || got[0] != 0x44 {
t.Fatalf("offset 128 after update: got %x err=%v, want tag 0x44", got, err)
}
}
// TestStemBlobMergeHelper verifies mergeStemBlob: read existing, apply
// writes, produce new blob in one call.
func TestStemBlobMergeHelper(t *testing.T) {
// Start with a blob containing offset 0.
b := newStemBuilder()
b.set(0, mkval(0x01))
initial := b.encode()
// Merge: overwrite 0, add 1, clear a non-existent offset (no-op).
result, err := mergeStemBlob(initial, []stemOffsetValue{
{Offset: 0, Value: mkval(0x02)},
{Offset: 1, Value: mkval(0x03)},
{Offset: 100, Value: nil},
})
if err != nil {
t.Fatalf("merge: %v", err)
}
got, _ := extractStemOffset(result, 0)
if got == nil || got[0] != 0x02 {
t.Fatalf("merged offset 0: got %x, want tag 0x02", got)
}
got, _ = extractStemOffset(result, 1)
if got == nil || got[0] != 0x03 {
t.Fatalf("merged offset 1: got %x, want tag 0x03", got)
}
}
// TestStemBlobMergeToEmpty verifies that clearing every populated entry
// via merge returns a nil blob (so the caller deletes the key).
func TestStemBlobMergeToEmpty(t *testing.T) {
b := newStemBuilder()
b.set(0, mkval(0x01))
b.set(5, mkval(0x02))
initial := b.encode()
result, err := mergeStemBlob(initial, []stemOffsetValue{
{Offset: 0, Value: nil},
{Offset: 5, Value: nil},
})
if err != nil {
t.Fatalf("merge to empty: %v", err)
}
if result != nil {
t.Fatalf("merge to empty: got %x, want nil", result)
}
}
// TestStemBlobTombstoneZeroBytes verifies that a 32-byte zero value is
// preserved as "present with zero value" — not confused with "absent".
// DeleteStorage uses this convention.
func TestStemBlobTombstoneZeroBytes(t *testing.T) {
b := newStemBuilder()
zeros := make([]byte, stemBlobValueSize)
b.set(64, zeros)
if b.empty() {
t.Fatal("zero-value entry should count as populated")
}
blob := b.encode()
got, err := extractStemOffset(blob, 64)
if err != nil {
t.Fatalf("extract tombstone: %v", err)
}
if !bytes.Equal(got, zeros) {
t.Fatalf("extract tombstone: got %x, want 32 zero bytes", got)
}
}
// TestStemBlobMalformedInput verifies that decodeStemBlob detects
// malformed blobs with wrong lengths.
func TestStemBlobMalformedInput(t *testing.T) {
// Shorter than bitmap.
if _, _, err := decodeStemBlob(make([]byte, 10)); err == nil {
t.Fatal("expected error for too-short blob")
}
// Bitmap claims 2 entries but blob only has room for 1.
var bitmap [stemBlobBitmapSize]byte
bitmap[0] = 0xC0 // bits 0 and 1 set → 2 entries
short := make([]byte, stemBlobBitmapSize+stemBlobValueSize)
copy(short, bitmap[:])
if _, _, err := decodeStemBlob(short); err == nil {
t.Fatal("expected error for blob shorter than bitmap implies")
}
}
// TestBitmapRank sanity-checks the bit-to-index helper used by
// extractStemOffset for single-offset reads.
func TestBitmapRank(t *testing.T) {
var bitmap [stemBlobBitmapSize]byte
// Set bits at offsets 0, 1, 5, 64, 200.
for _, o := range []byte{0, 1, 5, 64, 200} {
bitmap[o/8] |= 1 << (7 - uint(o%8))
}
cases := []struct {
offset byte
want int
}{
{0, 0}, // first set bit is at index 0
{1, 1}, // second set bit
{5, 2}, // third
{64, 3}, // fourth
{200, 4}, // fifth
// For an unset offset, rank returns the number of set bits < it.
{2, 2}, // bits 0 and 1 are before 2
{100, 4}, // bits 0,1,5,64 are before 100
{255, 5}, // all five bits are before 255
}
for _, c := range cases {
if got := bitmapRank(bitmap, c.offset); got != c.want {
t.Errorf("bitmapRank(%d) = %d, want %d", c.offset, got, c.want)
}
}
}