diff --git a/core/types/block.go b/core/types/block.go index da9614793a..b5b6468a13 100644 --- a/core/types/block.go +++ b/core/types/block.go @@ -240,7 +240,7 @@ type extblock struct { // // The receipt's bloom must already calculated for the block's bloom to be // correctly calculated. -func NewBlock(header *Header, body *Body, receipts []*Receipt, hasher TrieHasher) *Block { +func NewBlock(header *Header, body *Body, receipts []*Receipt, hasher ListHasher) *Block { if body == nil { body = &Body{} } diff --git a/core/types/hashing.go b/core/types/hashing.go index 3cc22d50d1..98fe64e15a 100644 --- a/core/types/hashing.go +++ b/core/types/hashing.go @@ -27,7 +27,7 @@ import ( "github.com/ethereum/go-ethereum/rlp" ) -// hasherPool holds LegacyKeccak256 hashers for rlpHash. +// hasherPool holds LegacyKeccak256 buffer for rlpHash. var hasherPool = sync.Pool{ New: func() interface{} { return crypto.NewKeccakState() }, } @@ -75,11 +75,17 @@ func prefixedRlpHash(prefix byte, x interface{}) (h common.Hash) { return h } -// TrieHasher is the tool used to calculate the hash of derivable list. -// This is internal, do not use. -type TrieHasher interface { +// ListHasher defines the interface for computing the hash of a derivable list. +type ListHasher interface { + // Reset clears the internal state of the hasher, preparing it for reuse. Reset() - Update([]byte, []byte) error + + // Update inserts the given key-value pair into the hasher. + // The implementation must copy the provided slices, allowing the caller + // to safely modify them after the call returns. + Update(key []byte, value []byte) error + + // Hash computes and returns the final hash of all inserted key-value pairs. Hash() common.Hash } @@ -91,19 +97,20 @@ type DerivableList interface { EncodeIndex(int, *bytes.Buffer) } +// encodeForDerive encodes the element in the list at the position i into the buffer. func encodeForDerive(list DerivableList, i int, buf *bytes.Buffer) []byte { buf.Reset() list.EncodeIndex(i, buf) - // It's really unfortunate that we need to perform this copy. - // StackTrie holds onto the values until Hash is called, so the values - // written to it must not alias. - return common.CopyBytes(buf.Bytes()) + return buf.Bytes() } // DeriveSha creates the tree hashes of transactions, receipts, and withdrawals in a block header. -func DeriveSha(list DerivableList, hasher TrieHasher) common.Hash { +func DeriveSha(list DerivableList, hasher ListHasher) common.Hash { hasher.Reset() + // Allocate a buffer for value encoding. As the hasher is claimed that all + // supplied key value pairs will be copied by hasher and safe to reuse the + // encoding buffer. valueBuf := encodeBufferPool.Get().(*bytes.Buffer) defer encodeBufferPool.Put(valueBuf) diff --git a/core/types/hashing_test.go b/core/types/hashing_test.go index 54adbc73e8..a7153bf09a 100644 --- a/core/types/hashing_test.go +++ b/core/types/hashing_test.go @@ -26,12 +26,10 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/hexutil" - "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" - "github.com/ethereum/go-ethereum/triedb" ) func TestDeriveSha(t *testing.T) { @@ -40,7 +38,7 @@ func TestDeriveSha(t *testing.T) { t.Fatal(err) } for len(txs) < 1000 { - exp := types.DeriveSha(txs, trie.NewEmpty(triedb.NewDatabase(rawdb.NewMemoryDatabase(), nil))) + exp := types.DeriveSha(txs, trie.NewListHasher()) got := types.DeriveSha(txs, trie.NewStackTrie(nil)) if !bytes.Equal(got[:], exp[:]) { t.Fatalf("%d txs: got %x exp %x", len(txs), got, exp) @@ -76,30 +74,45 @@ func TestEIP2718DeriveSha(t *testing.T) { } } +// goos: darwin +// goarch: arm64 +// pkg: github.com/ethereum/go-ethereum/core/types +// cpu: Apple M1 Pro +// BenchmarkDeriveSha200 +// BenchmarkDeriveSha200/std_trie +// BenchmarkDeriveSha200/std_trie-8 6754 174074 ns/op 80054 B/op 1926 allocs/op +// BenchmarkDeriveSha200/stack_trie +// BenchmarkDeriveSha200/stack_trie-8 7296 162675 ns/op 745 B/op 19 allocs/op func BenchmarkDeriveSha200(b *testing.B) { txs, err := genTxs(200) if err != nil { b.Fatal(err) } - var exp common.Hash - var got common.Hash + want := types.DeriveSha(txs, trie.NewListHasher()) + b.Run("std_trie", func(b *testing.B) { b.ReportAllocs() + var have common.Hash for b.Loop() { - exp = types.DeriveSha(txs, trie.NewEmpty(triedb.NewDatabase(rawdb.NewMemoryDatabase(), nil))) + have = types.DeriveSha(txs, trie.NewListHasher()) + } + if have != want { + b.Errorf("have %x want %x", have, want) } }) + st := trie.NewStackTrie(nil) b.Run("stack_trie", func(b *testing.B) { - b.ResetTimer() b.ReportAllocs() + var have common.Hash for b.Loop() { - got = types.DeriveSha(txs, trie.NewStackTrie(nil)) + st.Reset() + have = types.DeriveSha(txs, st) + } + if have != want { + b.Errorf("have %x want %x", have, want) } }) - if got != exp { - b.Errorf("got %x exp %x", got, exp) - } } func TestFuzzDeriveSha(t *testing.T) { @@ -107,7 +120,7 @@ func TestFuzzDeriveSha(t *testing.T) { rndSeed := mrand.Int() for i := 0; i < 10; i++ { seed := rndSeed + i - exp := types.DeriveSha(newDummy(i), trie.NewEmpty(triedb.NewDatabase(rawdb.NewMemoryDatabase(), nil))) + exp := types.DeriveSha(newDummy(i), trie.NewListHasher()) got := types.DeriveSha(newDummy(i), trie.NewStackTrie(nil)) if !bytes.Equal(got[:], exp[:]) { printList(t, newDummy(seed)) @@ -135,7 +148,7 @@ func TestDerivableList(t *testing.T) { }, } for i, tc := range tcs[1:] { - exp := types.DeriveSha(flatList(tc), trie.NewEmpty(triedb.NewDatabase(rawdb.NewMemoryDatabase(), nil))) + exp := types.DeriveSha(flatList(tc), trie.NewListHasher()) got := types.DeriveSha(flatList(tc), trie.NewStackTrie(nil)) if !bytes.Equal(got[:], exp[:]) { t.Fatalf("case %d: got %x exp %x", i, got, exp) diff --git a/internal/blocktest/test_hash.go b/internal/blocktest/test_hash.go index 4d2b077e89..b3e7098e2b 100644 --- a/internal/blocktest/test_hash.go +++ b/internal/blocktest/test_hash.go @@ -23,6 +23,7 @@ package blocktest import ( + "bytes" "hash" "github.com/ethereum/go-ethereum/common" @@ -48,8 +49,8 @@ func (h *testHasher) Reset() { // Update updates the hash state with the given key and value. func (h *testHasher) Update(key, val []byte) error { - h.hasher.Write(key) - h.hasher.Write(val) + h.hasher.Write(bytes.Clone(key)) + h.hasher.Write(bytes.Clone(val)) return nil } diff --git a/trie/bytepool.go b/trie/bytepool.go index 4f9c5672fd..31be7ae749 100644 --- a/trie/bytepool.go +++ b/trie/bytepool.go @@ -32,8 +32,8 @@ func newBytesPool(sliceCap, nitems int) *bytesPool { } } -// Get returns a slice. Safe for concurrent use. -func (bp *bytesPool) Get() []byte { +// get returns a slice. Safe for concurrent use. +func (bp *bytesPool) get() []byte { select { case b := <-bp.c: return b @@ -42,18 +42,18 @@ func (bp *bytesPool) Get() []byte { } } -// GetWithSize returns a slice with specified byte slice size. -func (bp *bytesPool) GetWithSize(s int) []byte { - b := bp.Get() +// getWithSize returns a slice with specified byte slice size. +func (bp *bytesPool) getWithSize(s int) []byte { + b := bp.get() if cap(b) < s { return make([]byte, s) } return b[:s] } -// Put returns a slice to the pool. Safe for concurrent use. This method +// put returns a slice to the pool. Safe for concurrent use. This method // will ignore slices that are too small or too large (>3x the cap) -func (bp *bytesPool) Put(b []byte) { +func (bp *bytesPool) put(b []byte) { if c := cap(b); c < bp.w || c > 3*bp.w { return } @@ -62,3 +62,40 @@ func (bp *bytesPool) Put(b []byte) { default: } } + +// unsafeBytesPool is a pool for byte slices. It is not safe for concurrent use. +type unsafeBytesPool struct { + items [][]byte + w int +} + +// newUnsafeBytesPool creates a new unsafeBytesPool. The sliceCap sets the +// capacity of newly allocated slices, and the nitems determines how many +// items the pool will hold, at maximum. +func newUnsafeBytesPool(sliceCap, nitems int) *unsafeBytesPool { + return &unsafeBytesPool{ + items: make([][]byte, 0, nitems), + w: sliceCap, + } +} + +// Get returns a slice with pre-allocated space. +func (bp *unsafeBytesPool) get() []byte { + if len(bp.items) > 0 { + last := bp.items[len(bp.items)-1] + bp.items = bp.items[:len(bp.items)-1] + return last + } + return make([]byte, 0, bp.w) +} + +// put returns a slice to the pool. This method will ignore slices that are +// too small or too large (>3x the cap) +func (bp *unsafeBytesPool) put(b []byte) { + if c := cap(b); c < bp.w || c > 3*bp.w { + return + } + if len(bp.items) < cap(bp.items) { + bp.items = append(bp.items, b) + } +} diff --git a/trie/list_hasher.go b/trie/list_hasher.go new file mode 100644 index 0000000000..8f334f9901 --- /dev/null +++ b/trie/list_hasher.go @@ -0,0 +1,56 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + + "github.com/ethereum/go-ethereum/common" +) + +// ListHasher is a wrapper of the Merkle-Patricia-Trie, which implements +// types.ListHasher. Compared to a Trie instance, the Update method of this +// type always deep-copies its input slices. +// +// This implementation is very inefficient in terms of memory allocation, +// compared with StackTrie. It exists only for correctness comparison purposes. +type ListHasher struct { + tr *Trie +} + +// NewListHasher initializes the list hasher. +func NewListHasher() *ListHasher { + return &ListHasher{ + tr: NewEmpty(nil), + } +} + +// Reset clears the internal state prepares the ListHasher for reuse. +func (h *ListHasher) Reset() { + h.tr.reset() +} + +// Update inserts a key-value pair into the trie. +func (h *ListHasher) Update(key []byte, value []byte) error { + key, value = bytes.Clone(key), bytes.Clone(value) + return h.tr.Update(key, value) +} + +// Hash computes the root hash of all inserted key-value pairs. +func (h *ListHasher) Hash() common.Hash { + return h.tr.Hash() +} diff --git a/trie/stacktrie.go b/trie/stacktrie.go index 2b7366c3c5..18fe1eea78 100644 --- a/trie/stacktrie.go +++ b/trie/stacktrie.go @@ -28,7 +28,7 @@ import ( var ( stPool = sync.Pool{New: func() any { return new(stNode) }} bPool = newBytesPool(32, 100) - _ = types.TrieHasher((*StackTrie)(nil)) + _ = types.ListHasher((*StackTrie)(nil)) ) // OnTrieNode is a callback method invoked when a trie node is committed @@ -50,6 +50,7 @@ type StackTrie struct { onTrieNode OnTrieNode kBuf []byte // buf space used for hex-key during insertions pBuf []byte // buf space used for path during insertions + vPool *unsafeBytesPool } // NewStackTrie allocates and initializes an empty trie. The committed nodes @@ -61,6 +62,7 @@ func NewStackTrie(onTrieNode OnTrieNode) *StackTrie { onTrieNode: onTrieNode, kBuf: make([]byte, 64), pBuf: make([]byte, 64), + vPool: newUnsafeBytesPool(300, 20), } } @@ -74,6 +76,9 @@ func (t *StackTrie) grow(key []byte) { } // Update inserts a (key, value) pair into the stack trie. +// +// Note the supplied key value pair is copied and managed internally, +// they are safe to be modified after this method returns. func (t *StackTrie) Update(key, value []byte) error { if len(value) == 0 { return errors.New("trying to insert empty (deletion)") @@ -88,7 +93,14 @@ func (t *StackTrie) Update(key, value []byte) error { } else { t.last = append(t.last[:0], k...) // reuse key slice } - t.insert(t.root, k, value, t.pBuf[:0]) + vBuf := t.vPool.get() + if cap(vBuf) < len(value) { + vBuf = common.CopyBytes(value) + } else { + vBuf = vBuf[:len(value)] + copy(vBuf, value) + } + t.insert(t.root, k, vBuf, t.pBuf[:0]) return nil } @@ -108,14 +120,16 @@ func (t *StackTrie) TrieKey(key []byte) []byte { // stNode represents a node within a StackTrie type stNode struct { typ uint8 // node type (as in branch, ext, leaf) - key []byte // key chunk covered by this (leaf|ext) node - val []byte // value contained by this node if it's a leaf - children [16]*stNode // list of children (for branch and exts) + key []byte // exclusive owned key chunk covered by this (leaf|ext) node + val []byte // exclusive owned value contained by this node (leaf: value; hash: hash) + children [16]*stNode // list of children (for branch and ext) } -// newLeaf constructs a leaf node with provided node key and value. The key -// will be deep-copied in the function and safe to modify afterwards, but -// value is not. +// newLeaf constructs a leaf node with provided node key and value. +// +// The key is deep-copied within the function, so it can be safely modified +// afterwards. The value is retained directly without copying, as it is +// exclusively owned by the stackTrie. func newLeaf(key, val []byte) *stNode { st := stPool.Get().(*stNode) st.typ = leafNode @@ -146,9 +160,9 @@ const ( func (n *stNode) reset() *stNode { if n.typ == hashedNode { // On hashnodes, we 'own' the val: it is guaranteed to be not held - // by external caller. Hence, when we arrive here, we can put it back - // into the pool - bPool.Put(n.val) + // by external caller. Hence, when we arrive here, we can put it + // back into the pool + bPool.put(n.val) } n.key = n.key[:0] n.val = nil @@ -172,11 +186,6 @@ func (n *stNode) getDiffIndex(key []byte) int { } // Helper function to that inserts a (key, value) pair into the trie. -// -// - The key is not retained by this method, but always copied if needed. -// - The value is retained by this method, as long as the leaf that it represents -// remains unhashed. However: it is never modified. -// - The path is not retained by this method. func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) { switch st.typ { case branchNode: /* Branch */ @@ -235,16 +244,14 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) { } var p *stNode if diffidx == 0 { - // the break is on the first byte, so - // the current node is converted into - // a branch node. + // the break is on the first byte, so the current node + // is converted into a branch node. st.children[0] = nil - p = st st.typ = branchNode + p = st } else { - // the common prefix is at least one byte - // long, insert a new intermediate branch - // node. + // the common prefix is at least one byte long, insert + // a new intermediate branch node. st.children[0] = stPool.Get().(*stNode) st.children[0].typ = branchNode p = st.children[0] @@ -280,8 +287,8 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) { if diffidx == 0 { // Convert current leaf into a branch st.typ = branchNode - p = st st.children[0] = nil + p = st } else { // Convert current node into an ext, // and insert a child branch node. @@ -307,9 +314,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) { st.val = nil case emptyNode: /* Empty */ - st.typ = leafNode - st.key = append(st.key, key...) // deep-copy the key as it's volatile - st.val = value + *st = *newLeaf(key, value) case hashedNode: panic("trying to insert into hash") @@ -393,18 +398,23 @@ func (t *StackTrie) hash(st *stNode, path []byte) { st.typ = hashedNode st.key = st.key[:0] - st.val = nil // Release reference to potentially externally held slice. + // Release reference to value slice which is exclusively owned + // by stackTrie itself. + if cap(st.val) > 0 && t.vPool != nil { + t.vPool.put(st.val) + } + st.val = nil // Skip committing the non-root node if the size is smaller than 32 bytes // as tiny nodes are always embedded in their parent except root node. if len(blob) < 32 && len(path) > 0 { - st.val = bPool.GetWithSize(len(blob)) + st.val = bPool.getWithSize(len(blob)) copy(st.val, blob) return } // Write the hash to the 'val'. We allocate a new val here to not mutate // input values. - st.val = bPool.GetWithSize(32) + st.val = bPool.getWithSize(32) t.h.hashDataTo(st.val, blob) // Invoke the callback it's provided. Notably, the path and blob slices are diff --git a/trie/trie.go b/trie/trie.go index 36cc732ee8..1ef2c2f1a6 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -784,8 +784,8 @@ func (t *Trie) Witness() map[string][]byte { return t.prevalueTracer.Values() } -// Reset drops the referenced root node and cleans all internal state. -func (t *Trie) Reset() { +// reset drops the referenced root node and cleans all internal state. +func (t *Trie) reset() { t.root = nil t.owner = common.Hash{} t.unhashed = 0