diff --git a/core/state/database_hasher.go b/core/state/database_hasher.go index 7fcf3edea3..b3f056d4d3 100644 --- a/core/state/database_hasher.go +++ b/core/state/database_hasher.go @@ -33,9 +33,19 @@ type CodeMut struct { // - Account == nil: delete the account // - Code == nil: leave code unchanged // - Code != nil: apply the given code mutation +// - CodeSize: the account's CURRENT total code size, not just the bytes +// carried in Code. It is used by implementations that pack the code +// size into their on-trie account encoding (e.g. the binary trie +// BasicData leaf). Callers must always populate this field to the +// account's real code size, obtained via stateObject.CodeSize() or an +// equivalent source — even on balance/nonce-only updates where the +// code bytes themselves are not loaded. Leaving it at zero on a +// non-code-touching update silently corrupts on-trie state for any +// hasher that stores code size. type AccountMut struct { - Account *Account // Null for deletion - Code *CodeMut // Null for unchanged + Account *Account // Null for deletion + Code *CodeMut // Null for unchanged + CodeSize int // Current code length (must be set by the caller) } // Hashes encapsulates a trie root together with its original (pre-update) root. diff --git a/core/state/database_hasher_binary.go b/core/state/database_hasher_binary.go index 86f33cfc5c..89850b377f 100644 --- a/core/state/database_hasher_binary.go +++ b/core/state/database_hasher_binary.go @@ -153,22 +153,25 @@ func (h *binaryHasher) deleteAccount(addr common.Address) error { } // update writes the account specified by the address into the state. +// +// The account's code size is taken from AccountMut.CodeSize, which the +// caller (StateDB.IntermediateRoot) populates via stateObject.CodeSize(). +// Per EIP-7864 the code_size field is packed into the BasicData leaf +// (bytes 5-7) and is consensus-critical; BinaryTrie.UpdateAccount rewrites +// the entire BasicData blob on every call, so passing the wrong codeLen +// would silently overwrite the stored code_size. In particular, for +// balance/nonce-only updates the new code bytes (account.Code) are nil +// and len(obj.code) is 0, yet the account may still have a non-zero code +// size that must be preserved — the caller gets this right by consulting +// the stateObject, which falls back to a reader code-size lookup when +// the bytes are not loaded. func (h *binaryHasher) updateAccount(addr common.Address, account AccountMut) error { - // Determine code size: use the new code length if provided, - // otherwise fall back to the cached or trie-stored value. - // - // TODO(rjl493456442) the contract code length is not assigned - // if it's not modified, fix it. - codeLen := 0 - if account.Code != nil { - codeLen = len(account.Code.Code) - } data := &types.StateAccount{ Nonce: account.Account.Nonce, Balance: account.Account.Balance, CodeHash: account.Account.CodeHash, } - if err := h.trie.UpdateAccount(addr, data, codeLen); err != nil { + if err := h.trie.UpdateAccount(addr, data, account.CodeSize); err != nil { return err } // Write chunked code into the trie when dirty. diff --git a/core/state/statedb.go b/core/state/statedb.go index 716f262bf5..e86a554508 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -787,7 +787,18 @@ func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash { continue } obj := s.stateObjects[addr] - mut := AccountMut{Account: &obj.data} + // CodeSize must be the account's CURRENT total code size, even for + // non-code-touching mutations. obj.CodeSize() returns len(obj.code) + // when the code is loaded, otherwise falls back to a code-size + // lookup via the reader. Hashers that pack code size into the + // on-trie account encoding (e.g. the binary trie BasicData leaf, + // per EIP-7864) rely on this value. Passing the default 0 here on + // a balance/nonce-only update would silently corrupt the BasicData + // leaf of every contract touched without a code write. + mut := AccountMut{ + Account: &obj.data, + CodeSize: obj.CodeSize(), + } if obj.dirtyCode { mut.Code = &CodeMut{Code: obj.code} diff --git a/core/state/statedb_test.go b/core/state/statedb_test.go index 8d56699919..262403cdcd 100644 --- a/core/state/statedb_test.go +++ b/core/state/statedb_test.go @@ -1266,3 +1266,85 @@ func TestStorageDirtiness(t *testing.T) { state.RevertToSnapshot(snap) checkDirty(common.Hash{0x1}, common.Hash{0x1}, true) } + +// TestVerkleCodeSizePreserved is a regression test for a latent bug in the +// binary-trie update path of binaryHasher: codeLen was derived from +// account.Code, which is only non-nil when the contract code itself was +// modified in the current block. For balance- or nonce-only changes, +// account.Code was nil and the hasher silently wrote codeLen=0 into the +// BasicData leaf, corrupting the EIP-7864-defined code_size field every +// time a contract's balance or nonce was touched without a code write. +// +// The fix plumbs the account's current total code size through +// AccountMut.CodeSize, which the caller populates via +// stateObject.CodeSize() at commit time. This value is authoritative +// whether or not the code bytes are currently loaded. +// +// This test verifies that the state root produced by "create contract, +// commit, reload, modify balance, commit" matches the state root produced +// by a single commit of the final state. Equality can only hold if the +// code size survives the balance-only commit. +func TestVerkleCodeSizePreserved(t *testing.T) { + newVerkleState := func(t *testing.T) (*StateDB, *triedb.Database) { + t.Helper() + disk := rawdb.NewMemoryDatabase() + tdb := triedb.NewDatabase(disk, triedb.VerkleDefaults) + sdb := NewDatabase(tdb, nil) + // A fresh verkle pathdb's disk layer is keyed by EmptyVerkleHash + // (all-zero hash), not EmptyRootHash. Using the wrong one fails + // with "triedb parent layer missing" at commit. + state, err := New(types.EmptyVerkleHash, sdb) + if err != nil { + t.Fatalf("failed to initialize state: %v", err) + } + return state, tdb + } + + var ( + addr = common.HexToAddress("0xdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef") + code = make([]byte, 1234) // non-trivial code length so codeSize matters + ) + for i := range code { + code[i] = byte(i) + } + + // Path A: create contract, commit, reload, modify only balance, commit. + // On the second commit obj.code is not loaded (dirtyCode=false), so + // the previous implementation computed codeLen=0 via len(obj.code). + // Triedb layers stay in memory (no tdb.Commit) so we can chain a + // second block on top of the first. + stateA, tdbA := newVerkleState(t) + sdbA := NewDatabase(tdbA, nil) + stateA.SetBalance(addr, uint256.NewInt(100), tracing.BalanceChangeUnspecified) + stateA.SetCode(addr, code, tracing.CodeChangeUnspecified) + rootA1, err := stateA.Commit(0, true, false) + if err != nil { + t.Fatalf("path A first commit: %v", err) + } + + stateA, err = New(rootA1, sdbA) + if err != nil { + t.Fatalf("path A reload: %v", err) + } + stateA.SetBalance(addr, uint256.NewInt(200), tracing.BalanceChangeUnspecified) + rootA2, err := stateA.Commit(1, true, false) + if err != nil { + t.Fatalf("path A second commit: %v", err) + } + + // Path B: construct the same final state in one shot (balance=200 + code). + // obj.code is loaded because SetCode was just called, so codeSize is + // always correct here — this is the "known-good" reference. + stateB, _ := newVerkleState(t) + stateB.SetBalance(addr, uint256.NewInt(200), tracing.BalanceChangeUnspecified) + stateB.SetCode(addr, code, tracing.CodeChangeUnspecified) + rootB, err := stateB.Commit(0, true, false) + if err != nil { + t.Fatalf("path B commit: %v", err) + } + + if rootA2 != rootB { + t.Fatalf("state root mismatch after balance-only update:\n path A (reload + balance): %x\n path B (fresh, same final state): %x\n regression: binaryHasher.updateAccount used len(account.Code.Code)=0 because code was not modified", + rootA2, rootB) + } +}