From ff772bfa085993057e3b12304ea8d290cbc40140 Mon Sep 17 00:00:00 2001 From: Stefan <22667037+qu0b@users.noreply.github.com> Date: Tue, 17 Mar 2026 14:08:06 +0100 Subject: [PATCH] bal: lazy scope allocation in access list builder (#34019) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Avoid allocating a `map[common.Address]*constructionAccountAccess` on every `enterScope()` call in the BAL access list builder - Push `nil` onto the stack and lazily allocate via `currentScope()` only when a state change occurs - `exitScope()` short-circuits when child scope is nil (no merge needed) - Adds correctness test (`TestLazyScopeCorrectness`) and benchmark (`BenchmarkPrecompileScopes`) ## Context The [execution-specs benchmark tests](https://github.com/ethereum/execution-specs/blob/tests-benchmark@v0.0.7/tests/benchmark/compute/precompile/test_alt_bn128.py) for `bn128_add` (line 19) and `bn128_pairing` (line 496) create blocks with transactions that loop `STATICCALL` to precompiles thousands of times. Each STATICCALL triggers `EnterScope()`/`ExitScope()` in the BAL tracer, but precompile calls produce empty scopes since they don't touch state (`AddBalance(0)` is filtered by `!amount.IsZero()`). The previous implementation eagerly allocated a map on every `enterScope()`, creating ~200K unnecessary heap allocations per transaction for `bn128_add` (150 gas, ~200K calls per 30M gas block). **Note:** For benchmark-only workloads that don't need BAL validation, `--bal.executionmode=sequential` can be used to skip the parallel processor entirely and avoid all tracing overhead. ## Benchmark results Precompile calls with no state changes per scope: | Calls | Before (ns/op) | After (ns/op) | Speedup | Allocs Before → After | |---|---|---|---|---| | 100 | 5,073 | 1,499 | **3.4x** | 119 → 19 | | 1,000 | 47,392 | 3,199 | **14.8x** | 1,019 → 19 | | 10,000 | 493,651 | 20,644 | **23.9x** | 10,019 → 19 | | 100,000 | 4,526,165 | 192,502 | **23.5x** | 100,019 → 19 | ## Test plan - [x] `TestLazyScopeCorrectness` — mixed workload: precompile scopes + state-changing scopes + reverted scopes - [x] `BenchmarkPrecompileScopes` — measures scope tracking overhead for 100 to 100K empty scopes - [x] All `core/vm/` tests pass - [x] All `core/types/bal/` tests pass (pre-existing failures in `TestBALEncoding` and `TestBlockAccessListValidation` unrelated) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 (1M context) --- core/types/bal/bal.go | 83 +++++++++++++++++---------- core/types/bal/bal_test.go | 113 +++++++++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+), 29 deletions(-) diff --git a/core/types/bal/bal.go b/core/types/bal/bal.go index 35b31194cf..752465c6e2 100644 --- a/core/types/bal/bal.go +++ b/core/types/bal/bal.go @@ -50,17 +50,30 @@ func newAccessListBuilder() *idxAccessListBuilder { } } -func (c *idxAccessListBuilder) storageRead(address common.Address, key common.Hash) { - if _, ok := c.accessesStack[len(c.accessesStack)-1][address]; !ok { - c.accessesStack[len(c.accessesStack)-1][address] = &constructionAccountAccess{} +// currentScope returns the current (topmost) scope, lazily allocating if needed. +// This avoids allocating a map for scopes that never record any state changes, +// which is critical for precompile-heavy blocks where STATICCALL creates/destroys +// thousands of empty scopes per transaction. +func (c *idxAccessListBuilder) currentScope() map[common.Address]*constructionAccountAccess { + top := len(c.accessesStack) - 1 + if c.accessesStack[top] == nil { + c.accessesStack[top] = make(map[common.Address]*constructionAccountAccess) } - acctAccesses := c.accessesStack[len(c.accessesStack)-1][address] - acctAccesses.StorageRead(key) + return c.accessesStack[top] +} + +func (c *idxAccessListBuilder) storageRead(address common.Address, key common.Hash) { + scope := c.currentScope() + if _, ok := scope[address]; !ok { + scope[address] = &constructionAccountAccess{} + } + scope[address].StorageRead(key) } func (c *idxAccessListBuilder) accountRead(address common.Address) { - if _, ok := c.accessesStack[len(c.accessesStack)-1][address]; !ok { - c.accessesStack[len(c.accessesStack)-1][address] = &constructionAccountAccess{} + scope := c.currentScope() + if _, ok := scope[address]; !ok { + scope[address] = &constructionAccountAccess{} } } @@ -75,11 +88,11 @@ func (c *idxAccessListBuilder) storageWrite(address common.Address, key, prevVal c.prestates[address].storage[key] = prevVal } - if _, ok := c.accessesStack[len(c.accessesStack)-1][address]; !ok { - c.accessesStack[len(c.accessesStack)-1][address] = &constructionAccountAccess{} + scope := c.currentScope() + if _, ok := scope[address]; !ok { + scope[address] = &constructionAccountAccess{} } - acctAccesses := c.accessesStack[len(c.accessesStack)-1][address] - acctAccesses.StorageWrite(key, prevVal, newVal) + scope[address].StorageWrite(key, prevVal, newVal) } func (c *idxAccessListBuilder) balanceChange(address common.Address, prev, cur *uint256.Int) { @@ -89,11 +102,11 @@ func (c *idxAccessListBuilder) balanceChange(address common.Address, prev, cur * if c.prestates[address].balance == nil { c.prestates[address].balance = prev } - if _, ok := c.accessesStack[len(c.accessesStack)-1][address]; !ok { - c.accessesStack[len(c.accessesStack)-1][address] = &constructionAccountAccess{} + scope := c.currentScope() + if _, ok := scope[address]; !ok { + scope[address] = &constructionAccountAccess{} } - acctAccesses := c.accessesStack[len(c.accessesStack)-1][address] - acctAccesses.BalanceChange(cur) + scope[address].BalanceChange(cur) } func (c *idxAccessListBuilder) codeChange(address common.Address, prev, cur []byte) { @@ -113,12 +126,11 @@ func (c *idxAccessListBuilder) codeChange(address common.Address, prev, cur []by } c.prestates[address].code = prev } - if _, ok := c.accessesStack[len(c.accessesStack)-1][address]; !ok { - c.accessesStack[len(c.accessesStack)-1][address] = &constructionAccountAccess{} + scope := c.currentScope() + if _, ok := scope[address]; !ok { + scope[address] = &constructionAccountAccess{} } - acctAccesses := c.accessesStack[len(c.accessesStack)-1][address] - - acctAccesses.CodeChange(cur) + scope[address].CodeChange(cur) } // selfDestruct is invoked when an account which has been created and invoked @@ -127,7 +139,8 @@ func (c *idxAccessListBuilder) codeChange(address common.Address, prev, cur []by // Any storage accesses/modifications performed at the contract during execution // of the current call are retained in the block access list as state reads. func (c *idxAccessListBuilder) selfDestruct(address common.Address) { - access := c.accessesStack[len(c.accessesStack)-1][address] + scope := c.currentScope() + access := scope[address] if len(access.storageMutations) != 0 && access.storageReads == nil { access.storageReads = make(map[common.Hash]struct{}) } @@ -144,16 +157,19 @@ func (c *idxAccessListBuilder) nonceChange(address common.Address, prev, cur uin if c.prestates[address].nonce == nil { c.prestates[address].nonce = &prev } - if _, ok := c.accessesStack[len(c.accessesStack)-1][address]; !ok { - c.accessesStack[len(c.accessesStack)-1][address] = &constructionAccountAccess{} + scope := c.currentScope() + if _, ok := scope[address]; !ok { + scope[address] = &constructionAccountAccess{} } - acctAccesses := c.accessesStack[len(c.accessesStack)-1][address] - acctAccesses.NonceChange(cur) + scope[address].NonceChange(cur) } // enterScope is called after a new EVM call frame has been entered. +// The scope map is lazily allocated by currentScope() only when a state +// change occurs, avoiding heap allocations for precompile calls that +// don't touch state. func (c *idxAccessListBuilder) enterScope() { - c.accessesStack = append(c.accessesStack, make(map[common.Address]*constructionAccountAccess)) + c.accessesStack = append(c.accessesStack, nil) } // exitScope is called after an EVM call scope terminates. If the call scope @@ -162,8 +178,14 @@ func (c *idxAccessListBuilder) enterScope() { // * mutated accounts/storage are added into the calling scope's access list as state accesses func (c *idxAccessListBuilder) exitScope(evmErr bool) { childAccessList := c.accessesStack[len(c.accessesStack)-1] - parentAccessList := c.accessesStack[len(c.accessesStack)-2] + c.accessesStack = c.accessesStack[:len(c.accessesStack)-1] + // If no state was accessed in this scope, nothing to merge. + if childAccessList == nil { + return + } + + parentAccessList := c.currentScope() for addr, childAccess := range childAccessList { if _, ok := parentAccessList[addr]; ok { } else { @@ -177,8 +199,6 @@ func (c *idxAccessListBuilder) exitScope(evmErr bool) { parentAccessList[addr].Merge(childAccess) } } - - c.accessesStack = c.accessesStack[:len(c.accessesStack)-1] } // finalise returns the net state mutations at the access list index as well as @@ -188,6 +208,11 @@ func (a *idxAccessListBuilder) finalise() (*StateDiff, StateAccesses) { diff := &StateDiff{make(map[common.Address]*AccountMutations)} stateAccesses := make(StateAccesses) + // Root scope may be nil if no state changes occurred at all. + if a.accessesStack[0] == nil { + return diff, stateAccesses + } + for addr, access := range a.accessesStack[0] { // remove any reported mutations from the access list with no net difference vs the index prestate value if access.nonce != nil && *a.prestates[addr].nonce == *access.nonce { diff --git a/core/types/bal/bal_test.go b/core/types/bal/bal_test.go index a0538c2b95..726ea48b58 100644 --- a/core/types/bal/bal_test.go +++ b/core/types/bal/bal_test.go @@ -19,6 +19,7 @@ package bal import ( "bytes" "cmp" + "fmt" "reflect" "slices" "testing" @@ -257,5 +258,117 @@ func TestBlockAccessListValidation(t *testing.T) { } } +// TestLazyScopeCorrectness verifies that lazy scope allocation produces +// identical results to the previous eager allocation for mixed workloads: +// precompile calls (empty scopes) interspersed with state-changing calls. +func TestLazyScopeCorrectness(t *testing.T) { + builder := newAccessListBuilder() + sender := common.HexToAddress("0x1234") + contract := common.HexToAddress("0x5678") + precompile := common.HexToAddress("0x06") + + // Tx-level: sender balance/nonce + builder.balanceChange(sender, uint256.NewInt(1000), uint256.NewInt(900)) + builder.nonceChange(sender, 0, 1) + + // Enter contract scope + builder.enterScope() + builder.storageRead(contract, common.HexToHash("0x01")) + + // Precompile STATICCALL (empty scope) + builder.enterScope() + builder.exitScope(false) + + // Another precompile call + builder.enterScope() + builder.exitScope(false) + + // Contract writes storage + builder.storageWrite(contract, common.HexToHash("0x02"), common.Hash{}, common.HexToHash("0xff")) + + // Precompile that reverts (still empty scope, reverted) + builder.enterScope() + builder.exitScope(true) + + // Nested call to another contract + builder.enterScope() + builder.balanceChange(precompile, uint256.NewInt(0), uint256.NewInt(100)) + builder.exitScope(false) + + // Exit contract scope + builder.exitScope(false) + + diff, accesses := builder.finalise() + + // Verify sender mutations + senderMut, ok := diff.Mutations[sender] + if !ok { + t.Fatal("sender not in mutations") + } + if senderMut.Balance == nil || !senderMut.Balance.Eq(uint256.NewInt(900)) { + t.Fatalf("sender balance mismatch: got %v", senderMut.Balance) + } + if senderMut.Nonce == nil || *senderMut.Nonce != 1 { + t.Fatalf("sender nonce mismatch: got %v", senderMut.Nonce) + } + + // Verify contract mutations (storage write) + contractMut, ok := diff.Mutations[contract] + if !ok { + t.Fatal("contract not in mutations") + } + if contractMut.StorageWrites == nil { + t.Fatal("contract has no storage writes") + } + if contractMut.StorageWrites[common.HexToHash("0x02")] != common.HexToHash("0xff") { + t.Fatal("contract storage write mismatch") + } + + // Verify precompile balance change + precompileMut, ok := diff.Mutations[precompile] + if !ok { + t.Fatal("precompile not in mutations") + } + if precompileMut.Balance == nil || !precompileMut.Balance.Eq(uint256.NewInt(100)) { + t.Fatalf("precompile balance mismatch: got %v", precompileMut.Balance) + } + + // Verify contract storage read is in accesses + contractAccesses, ok := accesses[contract] + if !ok { + t.Fatal("contract not in accesses") + } + if _, ok := contractAccesses[common.HexToHash("0x01")]; !ok { + t.Fatal("contract storage read not in accesses") + } +} + +// BenchmarkPrecompileScopes simulates a precompile-heavy transaction where +// STATICCALL is invoked thousands of times against a precompile (e.g. bn128_add). +// Each call creates a scope (EnterScope) that records no state changes (precompiles +// don't touch state), then exits (ExitScope). This benchmark measures the overhead +// of scope tracking for such workloads. +func BenchmarkPrecompileScopes(b *testing.B) { + for _, numCalls := range []int{100, 1000, 10000, 100000} { + b.Run(fmt.Sprintf("calls=%d", numCalls), func(b *testing.B) { + for i := 0; i < b.N; i++ { + builder := newAccessListBuilder() + // Simulate a transaction: sender balance/nonce change at depth 0, + // then thousands of precompile STATICCALL scopes that touch no state. + sender := common.HexToAddress("0x1234") + builder.balanceChange(sender, uint256.NewInt(1000), uint256.NewInt(900)) + builder.nonceChange(sender, 0, 1) + + for j := 0; j < numCalls; j++ { + builder.enterScope() + // Precompile call: no state hooks fire + builder.exitScope(false) + } + builder.finalise() + } + }) + } +} + // BALReader test ideas // * BAL which doesn't have any pre-tx system contracts should return an empty state diff at idx 0