From 64d185616cced990558c7d885d027dc0afd2f1ee Mon Sep 17 00:00:00 2001 From: CPerezz Date: Tue, 7 Apr 2026 18:32:46 +0200 Subject: [PATCH] core/state: plumb CodeSize through AccountMut for binaryHasher MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit binaryHasher.updateAccount computed codeLen from len(account.Code.Code), which is only non-zero when the code itself was modified in the current block. For balance- or nonce-only updates account.Code is nil and the computed codeLen was 0, silently overwriting the code_size field packed into the bintrie BasicData leaf (EIP-7864 bytes 5-7) with zero every time a contract was touched without a code write. The TODO(rjl493456442) on updateAccount acknowledged this. Fix it by adding a CodeSize field to AccountMut and having the caller at StateDB.IntermediateRoot populate it via stateObject.CodeSize(), which returns len(obj.code) when the bytes are loaded, otherwise falls back to a code-size lookup via the reader. The binary hasher then passes account.CodeSize straight to BinaryTrie.UpdateAccount as its codeLen argument, and the TODO is removed. Rationale for placing CodeSize on AccountMut rather than Account: AccountMut already carries Code *CodeMut — the new bytecode, which is not a field of Account — because code is write-time data that is not persisted in the flat-state format (SlimAccountRLP). CodeSize has the identical lifecycle: it is not in SlimAccountRLP, it is not populated by any reader, and it is only consumed by the hasher at write time. Mirroring Code's placement keeps the read-side/write-side split honest (Account models the persisted flat-state record; AccountMut adds the code-related write-time parameters). If the bintrie flat-state format is later extended to carry code_size, CodeSize can be promoted onto Account at that time. merkleHasher is unaffected: StateTrie.UpdateAccount ignores its codeLen parameter, so the wrapTrie.UpdateAccount shim continues to pass 0 and no state-root divergence is introduced on the MPT path. Regression test TestVerkleCodeSizePreserved verifies that the state root produced by "create contract, commit, reload, modify balance, commit" matches the root of a single-step construction of the same final state. Before the fix the roots diverge: path A (reload + balance): 1a675599... path B (fresh, same state): de0cfb03... --- core/state/database_hasher.go | 14 ++++- core/state/database_hasher_binary.go | 23 ++++---- core/state/statedb.go | 13 ++++- core/state/statedb_test.go | 82 ++++++++++++++++++++++++++++ 4 files changed, 119 insertions(+), 13 deletions(-) diff --git a/core/state/database_hasher.go b/core/state/database_hasher.go index 7fcf3edea3..b3f056d4d3 100644 --- a/core/state/database_hasher.go +++ b/core/state/database_hasher.go @@ -33,9 +33,19 @@ type CodeMut struct { // - Account == nil: delete the account // - Code == nil: leave code unchanged // - Code != nil: apply the given code mutation +// - CodeSize: the account's CURRENT total code size, not just the bytes +// carried in Code. It is used by implementations that pack the code +// size into their on-trie account encoding (e.g. the binary trie +// BasicData leaf). Callers must always populate this field to the +// account's real code size, obtained via stateObject.CodeSize() or an +// equivalent source — even on balance/nonce-only updates where the +// code bytes themselves are not loaded. Leaving it at zero on a +// non-code-touching update silently corrupts on-trie state for any +// hasher that stores code size. type AccountMut struct { - Account *Account // Null for deletion - Code *CodeMut // Null for unchanged + Account *Account // Null for deletion + Code *CodeMut // Null for unchanged + CodeSize int // Current code length (must be set by the caller) } // Hashes encapsulates a trie root together with its original (pre-update) root. diff --git a/core/state/database_hasher_binary.go b/core/state/database_hasher_binary.go index 86f33cfc5c..89850b377f 100644 --- a/core/state/database_hasher_binary.go +++ b/core/state/database_hasher_binary.go @@ -153,22 +153,25 @@ func (h *binaryHasher) deleteAccount(addr common.Address) error { } // update writes the account specified by the address into the state. +// +// The account's code size is taken from AccountMut.CodeSize, which the +// caller (StateDB.IntermediateRoot) populates via stateObject.CodeSize(). +// Per EIP-7864 the code_size field is packed into the BasicData leaf +// (bytes 5-7) and is consensus-critical; BinaryTrie.UpdateAccount rewrites +// the entire BasicData blob on every call, so passing the wrong codeLen +// would silently overwrite the stored code_size. In particular, for +// balance/nonce-only updates the new code bytes (account.Code) are nil +// and len(obj.code) is 0, yet the account may still have a non-zero code +// size that must be preserved — the caller gets this right by consulting +// the stateObject, which falls back to a reader code-size lookup when +// the bytes are not loaded. func (h *binaryHasher) updateAccount(addr common.Address, account AccountMut) error { - // Determine code size: use the new code length if provided, - // otherwise fall back to the cached or trie-stored value. - // - // TODO(rjl493456442) the contract code length is not assigned - // if it's not modified, fix it. - codeLen := 0 - if account.Code != nil { - codeLen = len(account.Code.Code) - } data := &types.StateAccount{ Nonce: account.Account.Nonce, Balance: account.Account.Balance, CodeHash: account.Account.CodeHash, } - if err := h.trie.UpdateAccount(addr, data, codeLen); err != nil { + if err := h.trie.UpdateAccount(addr, data, account.CodeSize); err != nil { return err } // Write chunked code into the trie when dirty. diff --git a/core/state/statedb.go b/core/state/statedb.go index 716f262bf5..e86a554508 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -787,7 +787,18 @@ func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash { continue } obj := s.stateObjects[addr] - mut := AccountMut{Account: &obj.data} + // CodeSize must be the account's CURRENT total code size, even for + // non-code-touching mutations. obj.CodeSize() returns len(obj.code) + // when the code is loaded, otherwise falls back to a code-size + // lookup via the reader. Hashers that pack code size into the + // on-trie account encoding (e.g. the binary trie BasicData leaf, + // per EIP-7864) rely on this value. Passing the default 0 here on + // a balance/nonce-only update would silently corrupt the BasicData + // leaf of every contract touched without a code write. + mut := AccountMut{ + Account: &obj.data, + CodeSize: obj.CodeSize(), + } if obj.dirtyCode { mut.Code = &CodeMut{Code: obj.code} diff --git a/core/state/statedb_test.go b/core/state/statedb_test.go index 8d56699919..262403cdcd 100644 --- a/core/state/statedb_test.go +++ b/core/state/statedb_test.go @@ -1266,3 +1266,85 @@ func TestStorageDirtiness(t *testing.T) { state.RevertToSnapshot(snap) checkDirty(common.Hash{0x1}, common.Hash{0x1}, true) } + +// TestVerkleCodeSizePreserved is a regression test for a latent bug in the +// binary-trie update path of binaryHasher: codeLen was derived from +// account.Code, which is only non-nil when the contract code itself was +// modified in the current block. For balance- or nonce-only changes, +// account.Code was nil and the hasher silently wrote codeLen=0 into the +// BasicData leaf, corrupting the EIP-7864-defined code_size field every +// time a contract's balance or nonce was touched without a code write. +// +// The fix plumbs the account's current total code size through +// AccountMut.CodeSize, which the caller populates via +// stateObject.CodeSize() at commit time. This value is authoritative +// whether or not the code bytes are currently loaded. +// +// This test verifies that the state root produced by "create contract, +// commit, reload, modify balance, commit" matches the state root produced +// by a single commit of the final state. Equality can only hold if the +// code size survives the balance-only commit. +func TestVerkleCodeSizePreserved(t *testing.T) { + newVerkleState := func(t *testing.T) (*StateDB, *triedb.Database) { + t.Helper() + disk := rawdb.NewMemoryDatabase() + tdb := triedb.NewDatabase(disk, triedb.VerkleDefaults) + sdb := NewDatabase(tdb, nil) + // A fresh verkle pathdb's disk layer is keyed by EmptyVerkleHash + // (all-zero hash), not EmptyRootHash. Using the wrong one fails + // with "triedb parent layer missing" at commit. + state, err := New(types.EmptyVerkleHash, sdb) + if err != nil { + t.Fatalf("failed to initialize state: %v", err) + } + return state, tdb + } + + var ( + addr = common.HexToAddress("0xdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef") + code = make([]byte, 1234) // non-trivial code length so codeSize matters + ) + for i := range code { + code[i] = byte(i) + } + + // Path A: create contract, commit, reload, modify only balance, commit. + // On the second commit obj.code is not loaded (dirtyCode=false), so + // the previous implementation computed codeLen=0 via len(obj.code). + // Triedb layers stay in memory (no tdb.Commit) so we can chain a + // second block on top of the first. + stateA, tdbA := newVerkleState(t) + sdbA := NewDatabase(tdbA, nil) + stateA.SetBalance(addr, uint256.NewInt(100), tracing.BalanceChangeUnspecified) + stateA.SetCode(addr, code, tracing.CodeChangeUnspecified) + rootA1, err := stateA.Commit(0, true, false) + if err != nil { + t.Fatalf("path A first commit: %v", err) + } + + stateA, err = New(rootA1, sdbA) + if err != nil { + t.Fatalf("path A reload: %v", err) + } + stateA.SetBalance(addr, uint256.NewInt(200), tracing.BalanceChangeUnspecified) + rootA2, err := stateA.Commit(1, true, false) + if err != nil { + t.Fatalf("path A second commit: %v", err) + } + + // Path B: construct the same final state in one shot (balance=200 + code). + // obj.code is loaded because SetCode was just called, so codeSize is + // always correct here — this is the "known-good" reference. + stateB, _ := newVerkleState(t) + stateB.SetBalance(addr, uint256.NewInt(200), tracing.BalanceChangeUnspecified) + stateB.SetCode(addr, code, tracing.CodeChangeUnspecified) + rootB, err := stateB.Commit(0, true, false) + if err != nil { + t.Fatalf("path B commit: %v", err) + } + + if rootA2 != rootB { + t.Fatalf("state root mismatch after balance-only update:\n path A (reload + balance): %x\n path B (fresh, same final state): %x\n regression: binaryHasher.updateAccount used len(account.Code.Code)=0 because code was not modified", + rootA2, rootB) + } +}