// Copyright 2024 The go-ethereum Authors // This file is part of the go-ethereum library. // // The go-ethereum library is free software: you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // The go-ethereum library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . package state import ( "errors" "fmt" "sync" "sync/atomic" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/overlay" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/bintrie" "github.com/ethereum/go-ethereum/trie/transitiontrie" "github.com/ethereum/go-ethereum/triedb" "github.com/ethereum/go-ethereum/triedb/database" "github.com/holiman/uint256" ) // ContractCodeReader defines the interface for accessing contract code. // // ContractCodeReader is supposed to be thread-safe. type ContractCodeReader interface { // Has returns the flag indicating whether the contract code with // specified address and hash exists or not. Has(addr common.Address, codeHash common.Hash) bool // Code retrieves a particular contract's code. Returns nil code if the // requested contract code doesn't exist. Code(addr common.Address, codeHash common.Hash) []byte // CodeSize retrieves a particular contracts code's size. Returns zero code // size if the requested contract code doesn't exist. CodeSize(addr common.Address, codeHash common.Hash) int } // Account represents the metadata of an Ethereum account object. // Unlike the representation in the Merkle-Patricia Trie, the storage root // is omitted. This structure is designed to provide a unified view over // flat state representations and remain compatible with different hashing // schemes (e.g., a unified binary tree in the future). type Account struct { Nonce uint64 Balance *uint256.Int CodeHash []byte } // newEmptyAccount returns an empty account. func newEmptyAccount() *Account { return &Account{ Balance: uint256.NewInt(0), CodeHash: types.EmptyCodeHash.Bytes(), } } // copy returns a deep-copied account object. func (acct *Account) copy() *Account { var balance *uint256.Int if acct.Balance != nil { balance = new(uint256.Int).Set(acct.Balance) } return &Account{ Nonce: acct.Nonce, Balance: balance, CodeHash: common.CopyBytes(acct.CodeHash), } } // StateReader defines the interface for accessing accounts and storage slots // associated with a specific state. // // StateReader is supposed to be thread-safe. type StateReader interface { // Account retrieves the account associated with a particular address. // // - Returns a nil account if it does not exist // - Returns an error only if an unexpected issue occurs // - The returned account is safe to modify after the call Account(addr common.Address) (*Account, error) // Storage retrieves the storage slot associated with a particular account // address and slot key. // // - Returns an empty slot if it does not exist // - Returns an error only if an unexpected issue occurs // - The returned storage slot is safe to modify after the call Storage(addr common.Address, slot common.Hash) (common.Hash, error) } // Reader defines the interface for accessing accounts, storage slots and contract // code associated with a specific state. // // Reader is assumed to be thread-safe and implementation must take care of the // concurrency issue by themselves. type Reader interface { ContractCodeReader StateReader } // flatReader wraps a database state reader and is safe for concurrent access. type flatReader struct { reader database.StateReader } // newFlatReader constructs a state reader with on the given state root. func newFlatReader(reader database.StateReader) *flatReader { return &flatReader{reader: reader} } // Account implements StateReader, retrieving the account specified by the address. // // An error will be returned if the associated snapshot is already stale or // the requested account is not yet covered by the snapshot. // // The returned account might be nil if it's not existent. func (r *flatReader) Account(addr common.Address) (*Account, error) { account, err := r.reader.Account(crypto.Keccak256Hash(addr[:])) if err != nil { return nil, err } if account == nil { return nil, nil } acct := &Account{ Nonce: account.Nonce, Balance: account.Balance, CodeHash: account.CodeHash, } // Account objects resolved from the flat state always omit the // empty code hash. if len(acct.CodeHash) == 0 { acct.CodeHash = types.EmptyCodeHash.Bytes() } return acct, nil } // Storage implements StateReader, retrieving the storage slot specified by the // address and slot key. // // An error will be returned if the associated snapshot is already stale or // the requested storage slot is not yet covered by the snapshot. // // The returned storage slot might be empty if it's not existent. func (r *flatReader) Storage(addr common.Address, key common.Hash) (common.Hash, error) { addrHash := crypto.Keccak256Hash(addr[:]) slotHash := crypto.Keccak256Hash(key[:]) ret, err := r.reader.Storage(addrHash, slotHash) if err != nil { return common.Hash{}, err } if len(ret) == 0 { return common.Hash{}, nil } // Perform the rlp-decode as the slot value is RLP-encoded in the state // snapshot. _, content, _, err := rlp.Split(ret) if err != nil { return common.Hash{}, err } var value common.Hash value.SetBytes(content) return value, nil } // bintrieFlatReader is the binary-trie analogue of flatReader. It exposes // the StateReader interface backed by the path database's per-stem flat // state, doing the EIP-7864 key derivation locally so the underlying // pathdb reader only sees raw 32-byte (stem || offset) lookup keys. // // Each Account call performs TWO underlying lookups (BasicData at offset // 0 and CodeHash at offset 1), because the diff layers store one entry // per offset rather than a pre-aggregated stem blob — this lets two // different blocks touch the same account at different offsets without // stomping on each other. Storage calls perform a single lookup at the // slot's full bintrie key. // // The reader holds a pathdb.RawStateReader (a small extension of // database.StateReader that exposes AccountRLP for raw-byte access) // because reader.Account() in pathdb decodes its result as slim RLP, // which is the wrong format for bintrie leaves. AccountRLP returns the // raw 32-byte leaf value untouched. type bintrieFlatReader struct { reader pathdbRawStateReader } // pathdbRawStateReader is the local view of pathdb.RawStateReader. It is // duplicated here (rather than imported) to avoid pulling pathdb into // every consumer of state.StateReader; the runtime type-assertion in // CachingDB.StateReader satisfies the interface dynamically. type pathdbRawStateReader interface { database.StateReader AccountRLP(hash common.Hash) ([]byte, error) } // newBintrieFlatReader constructs a state reader backed by the bintrie // codec. It returns nil if the underlying database.StateReader is not // raw-byte capable (which would be the case for any merkle path-database // reader); callers should fall through to the trie reader in that case. func newBintrieFlatReader(reader database.StateReader) *bintrieFlatReader { raw, ok := reader.(pathdbRawStateReader) if !ok { return nil } return &bintrieFlatReader{reader: raw} } // Account implements StateReader. It performs two underlying reads — // one for the BasicData leaf (offset 0) and one for the CodeHash leaf // (offset 1) — and combines them into a unified Account. // // Torn-read invariant (load-bearing): binaryHasher.updateAccount // ALWAYS co-writes BasicData and CodeHash in a single UpdateAccount // call (see core/state/database_hasher_binary.go:updateAccount). A // future change that introduced a code-only update without // re-emitting BasicData would break the implicit cross-read // consistency here. TestBinaryHasherWritesBothBasicAndCodeHash locks // this invariant down. // // Return value contract: // - both leaves 32 bytes → decoded Account, nil error. // - either leaf invalid length → corruption error, surfaced as-is. // - both leaves absent → (nil, nil): authoritative non-membership. // Uncovered keys already fail with errNotCoveredYet at the pathdb layer. func (r *bintrieFlatReader) Account(addr common.Address) (*Account, error) { basicKey := common.BytesToHash(bintrie.GetBinaryTreeKeyBasicData(addr)) codeKey := common.BytesToHash(bintrie.GetBinaryTreeKeyCodeHash(addr)) basicBlob, err := r.reader.AccountRLP(basicKey) if err != nil { return nil, fmt.Errorf("bintrie BasicData read %x: %w", addr, err) } codeBlob, err := r.reader.AccountRLP(codeKey) if err != nil { return nil, fmt.Errorf("bintrie CodeHash read %x: %w", addr, err) } if len(basicBlob) == 0 && len(codeBlob) == 0 { return nil, nil // Authoritative absence: pathdb confirmed key is covered } // A bintrie leaf is always either absent or exactly 32 bytes. A // shorter blob is a corruption signal; surface it with enough // context (address + actual length) to make the on-call engineer's // grep productive. if len(basicBlob) != 0 && len(basicBlob) != 32 { return nil, fmt.Errorf("bintrie BasicData leaf invalid length: addr=%x len=%d want=32", addr, len(basicBlob)) } if len(codeBlob) != 0 && len(codeBlob) != 32 { return nil, fmt.Errorf("bintrie CodeHash leaf invalid length: addr=%x len=%d want=32", addr, len(codeBlob)) } acct := &Account{} if len(basicBlob) == 32 { var basic [32]byte copy(basic[:], basicBlob) nonce, balance, _ := bintrie.UnpackBasicData(basic) acct.Nonce = nonce acct.Balance = balance } else { // CodeHash present but BasicData absent: treat as a freshly // created account whose body has not been written yet. The // merkle path returns the empty-balance form in this case too. acct.Balance = uint256.NewInt(0) } if len(codeBlob) == 32 { acct.CodeHash = common.CopyBytes(codeBlob) } else { acct.CodeHash = types.EmptyCodeHash.Bytes() } return acct, nil } // Storage implements StateReader. The caller's (addr, slot) pair is // turned into a single 32-byte (stem || offset) bintrie key via // GetBinaryTreeKeyStorageSlot, and we look it up via AccountRLP because // the diff layer stores all bintrie leaves under accountData regardless // of whether they came from an account header or a storage write. // // Return value contract: // - 32-byte leaf found → decode as common.Hash and return. // - invalid-length leaf → corruption error. // - no leaf → (common.Hash{}, nil): authoritative non-membership. // A slot explicitly set to zero is NOT absent — the bintrie // tombstone convention writes 32 zero bytes (a present leaf). func (r *bintrieFlatReader) Storage(addr common.Address, slot common.Hash) (common.Hash, error) { fullKey := bintrie.GetBinaryTreeKeyStorageSlot(addr, slot[:]) blob, err := r.reader.AccountRLP(common.BytesToHash(fullKey)) if err != nil { return common.Hash{}, fmt.Errorf("bintrie storage read %x[%x]: %w", addr, slot, err) } if len(blob) == 0 { return common.Hash{}, nil // Authoritative absence: pathdb confirmed key is covered } if len(blob) != 32 { return common.Hash{}, fmt.Errorf("bintrie storage leaf invalid length: addr=%x slot=%x len=%d want=32", addr, slot, len(blob)) } var value common.Hash copy(value[:], blob) return value, nil } // trieReader implements the StateReader interface, providing functions to access // state from the referenced trie. // // trieReader is safe for concurrent read. type trieReader struct { root common.Hash // State root which uniquely represent a state db *triedb.Database // Database for loading trie // Main trie, resolved in constructor. Note either the Merkle-Patricia-tree // or Verkle-tree is not safe for concurrent read. mainTrie Trie subRoots map[common.Address]common.Hash // Set of storage roots, cached when the account is resolved subTries map[common.Address]Trie // Group of storage tries, cached when it's resolved lock sync.Mutex // Lock for protecting concurrent read } // newTrieReader constructs a trie reader of the specific state. An error will be // returned if the associated trie specified by root is not existent. func newTrieReader(root common.Hash, db *triedb.Database) (*trieReader, error) { var ( tr Trie err error ) if !db.IsVerkle() { tr, err = trie.NewStateTrie(trie.StateTrieID(root), db) } else { // When IsVerkle() is true, create a BinaryTrie wrapped in TransitionTrie binTrie, binErr := bintrie.NewBinaryTrie(root, db) if binErr != nil { return nil, binErr } // Based on the transition status, determine if the overlay // tree needs to be created, or if a single, target tree is // to be picked. ts := overlay.LoadTransitionState(db.Disk(), root, true) if ts.InTransition() { mpt, err := trie.NewStateTrie(trie.StateTrieID(ts.BaseRoot), db) if err != nil { return nil, err } tr = transitiontrie.NewTransitionTrie(mpt, binTrie, false) } else { // HACK: Use TransitionTrie with nil base as a wrapper to make BinaryTrie // satisfy the Trie interface. This works around the import cycle between // trie and trie/bintrie packages. // // TODO: In future PRs, refactor the package structure to avoid this hack: // - Option 1: Move common interfaces (Trie, NodeIterator) to a separate // package that both trie and trie/bintrie can import // - Option 2: Create a factory function in the trie package that returns // BinaryTrie as a Trie interface without direct import // - Option 3: Move BinaryTrie to the main trie package // // The current approach works but adds unnecessary overhead and complexity // by using TransitionTrie when there's no actual transition happening. tr = transitiontrie.NewTransitionTrie(nil, binTrie, false) } } if err != nil { return nil, err } return &trieReader{ root: root, db: db, mainTrie: tr, subRoots: make(map[common.Address]common.Hash), subTries: make(map[common.Address]Trie), }, nil } // account is the inner version of Account and assumes the r.lock is already held. func (r *trieReader) account(addr common.Address) (*Account, error) { account, err := r.mainTrie.GetAccount(addr) if err != nil { return nil, err } if account == nil { r.subRoots[addr] = types.EmptyRootHash return nil, nil } else { r.subRoots[addr] = account.Root // Account objects resolved from the trie always include // the full code hash. return &Account{ Nonce: account.Nonce, Balance: account.Balance, CodeHash: account.CodeHash, }, nil } } // Account implements StateReader, retrieving the account specified by the address. // // An error will be returned if the trie state is corrupted. An nil account // will be returned if it's not existent in the trie. func (r *trieReader) Account(addr common.Address) (*Account, error) { r.lock.Lock() defer r.lock.Unlock() return r.account(addr) } // Storage implements StateReader, retrieving the storage slot specified by the // address and slot key. // // An error will be returned if the trie state is corrupted. An empty storage // slot will be returned if it's not existent in the trie. func (r *trieReader) Storage(addr common.Address, key common.Hash) (common.Hash, error) { r.lock.Lock() defer r.lock.Unlock() var ( tr Trie found bool value common.Hash ) if r.db.IsVerkle() { tr = r.mainTrie } else { tr, found = r.subTries[addr] if !found { root, ok := r.subRoots[addr] // The storage slot is accessed without account caching. It's unexpected // behavior but try to resolve the account first anyway. if !ok { _, err := r.account(addr) if err != nil { return common.Hash{}, err } root = r.subRoots[addr] } var err error tr, err = trie.NewStateTrie(trie.StorageTrieID(r.root, crypto.Keccak256Hash(addr.Bytes()), root), r.db) if err != nil { return common.Hash{}, err } r.subTries[addr] = tr } } ret, err := tr.GetStorage(addr, key.Bytes()) if err != nil { return common.Hash{}, err } value.SetBytes(ret) return value, nil } // multiStateReader is the aggregation of a list of StateReader interface, // providing state access by leveraging all readers. The checking priority // is determined by the position in the reader list. // // multiStateReader is safe for concurrent read and assumes all underlying // readers are thread-safe as well. type multiStateReader struct { readers []StateReader // List of state readers, sorted by checking priority } // newMultiStateReader constructs a multiStateReader instance with the given // readers. The priority among readers is assumed to be sorted. Note, it must // contain at least one reader for constructing a multiStateReader. func newMultiStateReader(readers ...StateReader) (*multiStateReader, error) { if len(readers) == 0 { return nil, errors.New("empty reader set") } return &multiStateReader{ readers: readers, }, nil } // Account implementing StateReader interface, retrieving the account associated // with a particular address. // // - Returns a nil account if it does not exist // - Returns an error only if an unexpected issue occurs // - The returned account is safe to modify after the call func (r *multiStateReader) Account(addr common.Address) (*Account, error) { var errs []error for _, reader := range r.readers { acct, err := reader.Account(addr) if err == nil { return acct, nil } errs = append(errs, err) } return nil, errors.Join(errs...) } // Storage implementing StateReader interface, retrieving the storage slot // associated with a particular account address and slot key. // // - Returns an empty slot if it does not exist // - Returns an error only if an unexpected issue occurs // - The returned storage slot is safe to modify after the call func (r *multiStateReader) Storage(addr common.Address, slot common.Hash) (common.Hash, error) { var errs []error for _, reader := range r.readers { slot, err := reader.Storage(addr, slot) if err == nil { return slot, nil } errs = append(errs, err) } return common.Hash{}, errors.Join(errs...) } // stateReaderWithCache is a wrapper around StateReader that maintains additional // state caches to support concurrent state access. type stateReaderWithCache struct { StateReader // Previously resolved state entries. accounts map[common.Address]*Account accountLock sync.RWMutex // List of storage buckets, each of which is thread-safe. // This reader is typically used in scenarios requiring concurrent // access to storage. Using multiple buckets helps mitigate // the overhead caused by locking. storageBuckets [16]struct { lock sync.RWMutex storages map[common.Address]map[common.Hash]common.Hash } } // newStateReaderWithCache constructs the state reader with local cache. func newStateReaderWithCache(sr StateReader) *stateReaderWithCache { r := &stateReaderWithCache{ StateReader: sr, accounts: make(map[common.Address]*Account), } for i := range r.storageBuckets { r.storageBuckets[i].storages = make(map[common.Address]map[common.Hash]common.Hash) } return r } // account retrieves the account specified by the address along with a flag // indicating whether it's found in the cache or not. The returned account // might be nil if it's not existent. // // An error will be returned if the state is corrupted in the underlying reader. func (r *stateReaderWithCache) account(addr common.Address) (*Account, bool, error) { // Try to resolve the requested account in the local cache r.accountLock.RLock() acct, ok := r.accounts[addr] r.accountLock.RUnlock() if ok { return acct, true, nil } // Try to resolve the requested account from the underlying reader acct, err := r.StateReader.Account(addr) if err != nil { return nil, false, err } r.accountLock.Lock() r.accounts[addr] = acct r.accountLock.Unlock() return acct, false, nil } // Account implements StateReader, retrieving the account specified by the address. // The returned account might be nil if it's not existent. // // An error will be returned if the state is corrupted in the underlying reader. func (r *stateReaderWithCache) Account(addr common.Address) (*Account, error) { account, _, err := r.account(addr) return account, err } // storage retrieves the storage slot specified by the address and slot key, along // with a flag indicating whether it's found in the cache or not. The returned // storage slot might be empty if it's not existent. func (r *stateReaderWithCache) storage(addr common.Address, slot common.Hash) (common.Hash, bool, error) { var ( value common.Hash ok bool bucket = &r.storageBuckets[addr[0]&0x0f] ) // Try to resolve the requested storage slot in the local cache bucket.lock.RLock() slots, ok := bucket.storages[addr] if ok { value, ok = slots[slot] } bucket.lock.RUnlock() if ok { return value, true, nil } // Try to resolve the requested storage slot from the underlying reader value, err := r.StateReader.Storage(addr, slot) if err != nil { return common.Hash{}, false, err } bucket.lock.Lock() slots, ok = bucket.storages[addr] if !ok { slots = make(map[common.Hash]common.Hash) bucket.storages[addr] = slots } slots[slot] = value bucket.lock.Unlock() return value, false, nil } // Storage implements StateReader, retrieving the storage slot specified by the // address and slot key. The returned storage slot might be empty if it's not // existent. // // An error will be returned if the state is corrupted in the underlying reader. func (r *stateReaderWithCache) Storage(addr common.Address, slot common.Hash) (common.Hash, error) { value, _, err := r.storage(addr, slot) return value, err } // stateReaderWithStats is a wrapper over the stateReaderWithCache, tracking // the cache hit statistics of the reader. type stateReaderWithStats struct { *stateReaderWithCache accountCacheHit atomic.Int64 accountCacheMiss atomic.Int64 storageCacheHit atomic.Int64 storageCacheMiss atomic.Int64 } // newReaderWithStats constructs the state reader with additional statistics tracked. func newStateReaderWithStats(sr *stateReaderWithCache) *stateReaderWithStats { return &stateReaderWithStats{ stateReaderWithCache: sr, } } // Account implements StateReader, retrieving the account specified by the address. // The returned account might be nil if it's not existent. // // An error will be returned if the state is corrupted in the underlying reader. func (r *stateReaderWithStats) Account(addr common.Address) (*Account, error) { account, incache, err := r.stateReaderWithCache.account(addr) if err != nil { return nil, err } if incache { r.accountCacheHit.Add(1) } else { r.accountCacheMiss.Add(1) } return account, nil } // Storage implements StateReader, retrieving the storage slot specified by the // address and slot key. The returned storage slot might be empty if it's not // existent. // // An error will be returned if the state is corrupted in the underlying reader. func (r *stateReaderWithStats) Storage(addr common.Address, slot common.Hash) (common.Hash, error) { value, incache, err := r.stateReaderWithCache.storage(addr, slot) if err != nil { return common.Hash{}, err } if incache { r.storageCacheHit.Add(1) } else { r.storageCacheMiss.Add(1) } return value, nil } // GetStateStats implements StateReaderStater, returning the statistics of the // state reader. func (r *stateReaderWithStats) GetStateStats() StateReaderStats { return StateReaderStats{ AccountCacheHit: r.accountCacheHit.Load(), AccountCacheMiss: r.accountCacheMiss.Load(), StorageCacheHit: r.storageCacheHit.Load(), StorageCacheMiss: r.storageCacheMiss.Load(), } } // reader aggregates a code reader and a state reader into a single object. type reader struct { ContractCodeReader StateReader } // newReader constructs a reader with the supplied code reader and state reader. func newReader(codeReader ContractCodeReader, stateReader StateReader) *reader { return &reader{ ContractCodeReader: codeReader, StateReader: stateReader, } } // GetCodeStats returns the statistics of code access. func (r *reader) GetCodeStats() ContractCodeReaderStats { if stater, ok := r.ContractCodeReader.(ContractCodeReaderStater); ok { return stater.GetCodeStats() } return ContractCodeReaderStats{} } // GetStateStats returns the statistics of state access. func (r *reader) GetStateStats() StateReaderStats { if stater, ok := r.StateReader.(StateReaderStater); ok { return stater.GetStateStats() } return StateReaderStats{} } // GetStats returns the aggregated statistics for both state and code access. func (r *reader) GetStats() ReaderStats { return ReaderStats{ CodeStats: r.GetCodeStats(), StateStats: r.GetStateStats(), } }