eth/fetcher: lazy-allocate unknown slices in TxFetcher.Notify

TxFetcher.Notify is the entry point for every NewPooledTransactionHashes
message: for a well-connected node with dozens of peers it fires
thousands of times per second. On a warm mempool nearly every announced
hash is already known, because some earlier peer already pushed it, so
the "unknown" filter reduces the batch to nothing.

The function still paid `2 * make([]T, 0, len(hashes))` upfront on every
call, wasting ~32B * len(hashes) per slice × 2 slices (common.Hash is
32B, txMetadata is 2B but aligned). At 256-hash announcements that is
10 KiB of allocator pressure per call for nothing to show for it.

Mirror the lazy-allocation pattern applied to scheduleFetches in
2ca74d2ef ("eth/fetcher: lazy-allocate hashes slice in
scheduleFetches"): defer the allocation to the first append, and size
to `len(hashes)-i` so the capacity is right-sized when a late-arriving
fresh hash is the only one we keep.

Benchmark (Apple M4 Pro, 256-hash batch, benchstat of 3 samples):

  scenario      ns/op        B/op           allocs/op
  AllKnown      2670 → 1915  10240 → 0      2 → 0        (-28% / -100% / -100%)
  HalfNew       4873 → 4968  10304 → 10304  3 → 3        (noise)
  AllNew        5932 → 5982  10304 → 10304  3 → 3        (noise)

AllKnown is the steady-state case. HalfNew and AllNew guard against
regressing the cold path; both stay within measurement noise.
This commit is contained in:
rayoo 2026-05-07 12:26:10 +08:00
parent ea1cf7bf5e
commit 162cd83ee0
2 changed files with 124 additions and 2 deletions

View file

@ -241,9 +241,12 @@ func (f *TxFetcher) Notify(peer string, types []byte, sizes []uint32, hashes []c
// because multiple concurrent notifies will still manage to pass it, but it's
// still valuable to check here because it runs concurrent to the internal
// loop, so anything caught here is time saved internally.
// unknownHashes and unknownMetas are allocated lazily: announcements
// where every hash is already known (the steady-state case once a tx
// has been gossiped to us once) skip the 32B*len(hashes) allocation.
var (
unknownHashes = make([]common.Hash, 0, len(hashes))
unknownMetas = make([]txMetadata, 0, len(hashes))
unknownHashes []common.Hash
unknownMetas []txMetadata
duplicate int64
onchain int64
@ -270,6 +273,10 @@ func (f *TxFetcher) Notify(peer string, types []byte, sizes []uint32, hashes []c
continue
}
if unknownHashes == nil {
unknownHashes = make([]common.Hash, 0, len(hashes)-i)
unknownMetas = make([]txMetadata, 0, len(hashes)-i)
}
unknownHashes = append(unknownHashes, hash)
// Transaction metadata has been available since eth68, and all

View file

@ -0,0 +1,115 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package fetcher
import (
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/txpool"
gethtypes "github.com/ethereum/go-ethereum/core/types"
)
// benchmarkNotify measures the allocation cost of TxFetcher.Notify when each
// peer's announcement batch contains `unknown` fresh hashes followed by
// `known` duplicates (simulated by validateMeta returning ErrAlreadyKnown).
//
// The steady-state case on a warm node is unknown == 0: every hash has already
// been seen from some other peer. The pre-allocation of
// make([]common.Hash, 0, len(hashes)) + make([]txMetadata, 0, len(hashes))
// used to force ~2 * 32 * len(hashes) bytes of waste per call in that case.
func benchmarkNotify(b *testing.B, unknown, known int) {
b.Helper()
total := unknown + known
hashes := make([]common.Hash, total)
for i := range hashes {
// Bit-pattern hashes so the first `unknown` look fresh and the rest
// trigger the "already-known" fast path.
hashes[i][0] = byte(i & 0xff)
hashes[i][1] = byte(i >> 8)
if i >= unknown {
// Distinguish "known" hashes by a unique byte so we can keep a
// tiny set that validateMeta treats as already in the pool.
hashes[i][31] = 1
}
}
types := make([]byte, total)
for i := range types {
types[i] = 0x03 // BlobTx type, valid per validateMeta
}
sizes := make([]uint32, total)
for i := range sizes {
sizes[i] = 128
}
// validateMeta discriminates by the marker byte we embedded in each hash:
// trailing-byte == 1 means "already in the local pool".
validate := func(h common.Hash, _ byte) error {
if h[31] == 1 {
return txpool.ErrAlreadyKnown
}
return nil
}
fetcher := NewTxFetcher(
nil,
validate,
func(txs []*gethtypes.Transaction) []error { return make([]error, len(txs)) },
func(string, []common.Hash) error { return nil },
nil,
)
// Don't start the fetcher loop; Notify's fast path only hits if the
// internal select fires, but when there are zero unknowns we return early
// before touching the channel. For unknown > 0 we drop the announcement
// by draining the notify channel in a goroutine.
if unknown > 0 {
go func() {
for range fetcher.notify {
}
}()
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
// Use a distinct peer id each call so Notify can't short-circuit
// on duplicate peer state.
if err := fetcher.Notify("peer", types, sizes, hashes); err != nil {
b.Fatal(err)
}
}
}
// BenchmarkNotify_AllKnown is the hot steady-state case: every announced hash
// is already in the local pool. Pre-fix this paid 2 * 32 * len(hashes) bytes
// per call for slices that never received an append.
func BenchmarkNotify_AllKnown(b *testing.B) {
benchmarkNotify(b, 0, 256)
}
// BenchmarkNotify_HalfNew is a mixed case with 50% fresh hashes.
func BenchmarkNotify_HalfNew(b *testing.B) {
benchmarkNotify(b, 128, 128)
}
// BenchmarkNotify_AllNew is the worst case for the lazy allocation: every
// hash is fresh so the slice must be allocated anyway. This guards against
// regressing the common path.
func BenchmarkNotify_AllNew(b *testing.B) {
benchmarkNotify(b, 256, 0)
}