core/state: fix incorrect contract code state metrics (#33376)

## Description
This PR fixes incorrect contract code state metrics by ensuring
duplicate codes are not counted towards the reported results.

## Rationale
The contract code metrics don't consider database deduplication. The
current implementation assumes that the results are only **slightly
inaccurate**, but this is not true, especially for data collection
efforts that started from the genesis block.
This commit is contained in:
Ng Wei Han 2025-12-10 11:33:59 +08:00 committed by GitHub
parent e58c785424
commit 9a346873b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 73 additions and 27 deletions

View file

@ -1609,15 +1609,22 @@ func (bc *BlockChain) writeBlockWithState(block *types.Block, receipts []*types.
if err := blockBatch.Write(); err != nil {
log.Crit("Failed to write block into disk", "err", err)
}
// Commit all cached state changes into underlying memory database.
root, stateUpdate, err := statedb.CommitWithUpdate(block.NumberU64(), bc.chainConfig.IsEIP158(block.Number()), bc.chainConfig.IsCancun(block.Number(), block.Time()))
var (
err error
root common.Hash
isEIP158 = bc.chainConfig.IsEIP158(block.Number())
isCancun = bc.chainConfig.IsCancun(block.Number(), block.Time())
)
if bc.stateSizer == nil {
root, err = statedb.Commit(block.NumberU64(), isEIP158, isCancun)
} else {
root, err = statedb.CommitAndTrack(block.NumberU64(), isEIP158, isCancun, bc.stateSizer)
}
if err != nil {
return err
}
// Emit the state update to the state sizestats if it's active
if bc.stateSizer != nil {
bc.stateSizer.Notify(stateUpdate)
}
// If node is running in path mode, skip explicit gc operation
// which is unnecessary in this mode.
if bc.triedb.Scheme() == rawdb.PathScheme {

View file

@ -40,6 +40,10 @@ import (
// ContractCodeReader defines the interface for accessing contract code.
type ContractCodeReader interface {
// Has returns the flag indicating whether the contract code with
// specified address and hash exists or not.
Has(addr common.Address, codeHash common.Hash) bool
// Code retrieves a particular contract's code.
//
// - Returns nil code along with nil error if the requested contract code
@ -170,6 +174,13 @@ func (r *cachingCodeReader) CodeSize(addr common.Address, codeHash common.Hash)
return len(code), nil
}
// Has returns the flag indicating whether the contract code with
// specified address and hash exists or not.
func (r *cachingCodeReader) Has(addr common.Address, codeHash common.Hash) bool {
code, _ := r.Code(addr, codeHash)
return len(code) > 0
}
// flatReader wraps a database state reader and is safe for concurrent access.
type flatReader struct {
reader database.StateReader

View file

@ -243,12 +243,14 @@ func calSizeStats(update *stateUpdate) (SizeStats, error) {
}
}
// Measure code changes. Note that the reported contract code size may be slightly
// inaccurate due to database deduplication (code is stored by its hash). However,
// this deviation is negligible and acceptable for measurement purposes.
codeExists := make(map[common.Hash]struct{})
for _, code := range update.codes {
if _, ok := codeExists[code.hash]; ok || code.exists {
continue
}
stats.ContractCodes += 1
stats.ContractCodeBytes += codeKeySize + int64(len(code.blob))
codeExists[code.hash] = struct{}{}
}
return stats, nil
}

View file

@ -58,7 +58,7 @@ func TestSizeTracker(t *testing.T) {
state.AddBalance(addr3, uint256.NewInt(3000), tracing.BalanceChangeUnspecified)
state.SetNonce(addr3, 3, tracing.NonceChangeUnspecified)
currentRoot, _, err := state.CommitWithUpdate(1, true, false)
currentRoot, err := state.Commit(1, true, false)
if err != nil {
t.Fatalf("Failed to commit initial state: %v", err)
}
@ -83,7 +83,7 @@ func TestSizeTracker(t *testing.T) {
if i%3 == 0 {
newState.SetCode(testAddr, []byte{byte(i), 0x60, 0x80, byte(i + 1), 0x52}, tracing.CodeChangeUnspecified)
}
root, _, err := newState.CommitWithUpdate(blockNum, true, false)
root, err := newState.Commit(blockNum, true, false)
if err != nil {
t.Fatalf("Failed to commit state at block %d: %v", blockNum, err)
}
@ -154,21 +154,22 @@ func TestSizeTracker(t *testing.T) {
if i%3 == 0 {
newState.SetCode(testAddr, []byte{byte(i), 0x60, 0x80, byte(i + 1), 0x52}, tracing.CodeChangeUnspecified)
}
root, update, err := newState.CommitWithUpdate(blockNum, true, false)
ret, err := newState.commitAndFlush(blockNum, true, false, true)
if err != nil {
t.Fatalf("Failed to commit state at block %d: %v", blockNum, err)
}
if err := tdb.Commit(root, false); err != nil {
tracker.Notify(ret)
if err := tdb.Commit(ret.root, false); err != nil {
t.Fatalf("Failed to commit trie at block %d: %v", blockNum, err)
}
diff, err := calSizeStats(update)
diff, err := calSizeStats(ret)
if err != nil {
t.Fatalf("Failed to calculate size stats for block %d: %v", blockNum, err)
}
trackedUpdates = append(trackedUpdates, diff)
tracker.Notify(update)
currentRoot = root
currentRoot = ret.root
}
finalRoot := rawdb.ReadSnapshotRoot(db)

View file

@ -1317,11 +1317,16 @@ func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool, blockNum
// commitAndFlush is a wrapper of commit which also commits the state mutations
// to the configured data stores.
func (s *StateDB) commitAndFlush(block uint64, deleteEmptyObjects bool, noStorageWiping bool) (*stateUpdate, error) {
func (s *StateDB) commitAndFlush(block uint64, deleteEmptyObjects bool, noStorageWiping bool, dedupCode bool) (*stateUpdate, error) {
ret, err := s.commit(deleteEmptyObjects, noStorageWiping, block)
if err != nil {
return nil, err
}
if dedupCode {
ret.markCodeExistence(s.reader)
}
// Commit dirty contract code if any exists
if db := s.db.TrieDB().Disk(); db != nil && len(ret.codes) > 0 {
batch := db.NewBatch()
@ -1376,21 +1381,21 @@ func (s *StateDB) commitAndFlush(block uint64, deleteEmptyObjects bool, noStorag
// no empty accounts left that could be deleted by EIP-158, storage wiping
// should not occur.
func (s *StateDB) Commit(block uint64, deleteEmptyObjects bool, noStorageWiping bool) (common.Hash, error) {
ret, err := s.commitAndFlush(block, deleteEmptyObjects, noStorageWiping)
ret, err := s.commitAndFlush(block, deleteEmptyObjects, noStorageWiping, false)
if err != nil {
return common.Hash{}, err
}
return ret.root, nil
}
// CommitWithUpdate writes the state mutations and returns both the root hash and the state update.
// This is useful for tracking state changes at the blockchain level.
func (s *StateDB) CommitWithUpdate(block uint64, deleteEmptyObjects bool, noStorageWiping bool) (common.Hash, *stateUpdate, error) {
ret, err := s.commitAndFlush(block, deleteEmptyObjects, noStorageWiping)
// CommitAndTrack writes the state mutations and notifies the size tracker of the state changes.
func (s *StateDB) CommitAndTrack(block uint64, deleteEmptyObjects bool, noStorageWiping bool, sizer *SizeTracker) (common.Hash, error) {
ret, err := s.commitAndFlush(block, deleteEmptyObjects, noStorageWiping, true)
if err != nil {
return common.Hash{}, nil, err
return common.Hash{}, err
}
return ret.root, ret, nil
sizer.Notify(ret)
return ret.root, nil
}
// Prepare handles the preparatory steps for executing a state transition with.

View file

@ -228,7 +228,7 @@ func (test *stateTest) run() bool {
} else {
state.IntermediateRoot(true) // call intermediateRoot at the transaction boundary
}
ret, err := state.commitAndFlush(0, true, false) // call commit at the block boundary
ret, err := state.commitAndFlush(0, true, false, false) // call commit at the block boundary
if err != nil {
panic(err)
}

View file

@ -26,8 +26,9 @@ import (
// contractCode represents a contract code with associated metadata.
type contractCode struct {
hash common.Hash // hash is the cryptographic hash of the contract code.
blob []byte // blob is the binary representation of the contract code.
hash common.Hash // hash is the cryptographic hash of the contract code.
blob []byte // blob is the binary representation of the contract code.
exists bool // flag whether the code has been existent
}
// accountDelete represents an operation for deleting an Ethereum account.
@ -190,3 +191,22 @@ func (sc *stateUpdate) stateSet() *triedb.StateSet {
RawStorageKey: sc.rawStorageKey,
}
}
// markCodeExistence determines whether each piece of contract code referenced
// in this state update actually exists.
//
// Note: This operation is expensive and not needed during normal state transitions.
// It is only required when SizeTracker is enabled to produce accurate state
// statistics.
func (sc *stateUpdate) markCodeExistence(reader ContractCodeReader) {
cache := make(map[common.Hash]bool)
for addr, code := range sc.codes {
if exists, ok := cache[code.hash]; ok {
code.exists = exists
continue
}
res := reader.Has(addr, code.hash)
cache[code.hash] = res
code.exists = res
}
}