mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-05-09 17:46:37 +00:00
## Summary Replace the `BinaryNode` interface with `NodeRef uint32` indices into typed arena pools, eliminating GC-scanned pointers from binary trie nodes. Inspired by [fjl's observation](https://github.com/ethereum/go-ethereum/pull/34034#issuecomment-4075176446): > *"if the binary trie produces such a large graph, it should probably be changed so that the trie node type does not contain pointers. The runtime does not scan objects that do not contain pointers, so it can really help with the performance to build it this way."* ### The problem CPU profiling of the binary trie (EIP-7864) showed **44% of CPU time in garbage collection**. Each `InternalNode` held two `BinaryNode` interface values (2 pointer-words each), and the GC scanned every one. With ~25K `InternalNode`s in memory during block processing, this created enormous GC pressure. ### The solution `NodeRef` is a compact `uint32` (2-bit kind tag + 30-bit pool index). `NodeStore` manages chunked typed pools per node kind: - **InternalNode pool**: ZERO Go pointers (children are `NodeRef`, hash is `[32]byte`) → noscan spans - **HashedNode pool**: ZERO Go pointers → noscan spans - **StemNode pool**: retains `Values [][]byte` (matching existing format) The serialization format is unchanged — flat InternalNode `[type][leftHash][rightHash]` = 65 bytes. ## Benchmark: Apple M4 Pro (`--benchtime=10s --count=3`, on top of #34021) | Metric | Baseline | Arena | Delta | |--------|----------|-------|-------| | Approve (Mgas/s) | 374 | 382 | **+2.1%** | | BalanceOf (Mgas/s) | 885 | 901 | **+1.8%** | | Approve allocs/op | 775K | **607K** | **-21.7%** | | BalanceOf allocs/op | 265K | **228K** | **-14.0%** | ## Benchmark: AMD EPYC 48-core (50GB state, execution-specs ERC-20, on top of #34021 + #34032) | Benchmark | Baseline | Arena | Delta | |-----------|----------|-------|-------| | erc20_approve (write) | 22.4 Mgas/s | **27.0 Mgas/s** | **+20.5%** | | mixed_sload_sstore | 62.9 Mgas/s | **97.3 Mgas/s** | **+54.7%** | | erc20_balanceof (read) | 180.8 Mgas/s | 167.6 Mgas/s | -7.3% (cold cache variance) | The arena benefit scales with heap size — the EPYC (larger heap, more GC pressure) shows much larger gains than the M4 Pro (efficient unified memory). The mixed workload baseline was unstable (62.9 vs 16.3 Mgas/s between runs due to GC-induced throughput collapse); the arena eliminates this entirely (95-97 Mgas/s, stable). ## Dependencies Benchmarked with #34021 (H01 N+1 fix) + #34032 (R14 parallel hashing). No code dependency — applies independently to master. All test suites pass (`trie/bintrie` with `-race`, `core/state`, `triedb/pathdb`, `cmd/geth`). --------- Co-authored-by: Guillaume Ballet <3272758+gballet@users.noreply.github.com>
109 lines
3.3 KiB
Go
109 lines
3.3 KiB
Go
// Copyright 2025 go-ethereum Authors
|
|
// This file is part of the go-ethereum library.
|
|
//
|
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package bintrie
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
|
|
"github.com/ethereum/go-ethereum/common"
|
|
)
|
|
|
|
// StemNode holds up to 256 values sharing a 31-byte stem.
|
|
//
|
|
// Invariant: dirty=false implies mustRecompute=false. Every mutation that
|
|
// invalidates the cached hash MUST also mark the blob for re-flush.
|
|
type StemNode struct {
|
|
Stem [StemSize]byte
|
|
values [StemNodeWidth][]byte // nil == slot absent
|
|
|
|
depth uint8
|
|
|
|
mustRecompute bool // hash is stale (cleared by Hash)
|
|
dirty bool // on-disk blob is stale (cleared by CollectNodes)
|
|
hash common.Hash // cached hash when mustRecompute == false
|
|
}
|
|
|
|
func (sn *StemNode) getValue(suffix byte) []byte {
|
|
return sn.values[suffix]
|
|
}
|
|
|
|
func (sn *StemNode) hasValue(suffix byte) bool {
|
|
return sn.values[suffix] != nil
|
|
}
|
|
|
|
// allValues returns the underlying slot array as a slice. nil entries mean
|
|
// absent. Callers must treat it as read-only.
|
|
func (sn *StemNode) allValues() [][]byte {
|
|
return sn.values[:]
|
|
}
|
|
|
|
// setValue mutates a value slot and marks the stem for re-hash and
|
|
// re-flush. This is the only API for post-load value mutation; direct
|
|
// values[...] writes are reserved for the on-disk load path in
|
|
// decodeNode, which must leave mustRecompute/dirty at their loaded
|
|
// state.
|
|
func (sn *StemNode) setValue(suffix byte, value []byte) {
|
|
sn.values[suffix] = value
|
|
sn.mustRecompute = true
|
|
sn.dirty = true
|
|
}
|
|
|
|
func (sn *StemNode) Hash() common.Hash {
|
|
if !sn.mustRecompute {
|
|
return sn.hash
|
|
}
|
|
|
|
// Use sha256.Sum256 (returns [32]byte by value) instead of a pooled
|
|
// hash.Hash: feeding data[i][:0] into the interface method Sum forces
|
|
// data to heap (escape analysis is conservative through interfaces).
|
|
// Sum256 takes []byte and returns by value, so data stays on stack.
|
|
var data [StemNodeWidth]common.Hash
|
|
|
|
for i, v := range sn.values {
|
|
if v != nil {
|
|
data[i] = sha256.Sum256(v)
|
|
}
|
|
}
|
|
|
|
var pair [2 * HashSize]byte
|
|
for level := 1; level <= 8; level++ {
|
|
for i := range StemNodeWidth / (1 << level) {
|
|
if data[i*2] == (common.Hash{}) && data[i*2+1] == (common.Hash{}) {
|
|
data[i] = common.Hash{}
|
|
continue
|
|
}
|
|
copy(pair[:HashSize], data[i*2][:])
|
|
copy(pair[HashSize:], data[i*2+1][:])
|
|
data[i] = sha256.Sum256(pair[:])
|
|
}
|
|
}
|
|
|
|
var final [StemSize + 1 + HashSize]byte
|
|
copy(final[:StemSize], sn.Stem[:])
|
|
final[StemSize] = 0
|
|
copy(final[StemSize+1:], data[0][:])
|
|
sn.hash = sha256.Sum256(final[:])
|
|
sn.mustRecompute = false
|
|
return sn.hash
|
|
}
|
|
|
|
func (sn *StemNode) Key(i int) []byte {
|
|
var ret [HashSize]byte
|
|
copy(ret[:], sn.Stem[:])
|
|
ret[StemSize] = byte(i)
|
|
return ret[:]
|
|
}
|