mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-05-20 14:59:26 +00:00
trie: reduce the memory allocation in trie hashing (#31902)
This pull request optimizes trie hashing by reducing memory allocation overhead. Specifically: - define a fullNodeEncoder pool to reuse encoders and avoid memory allocations. - simplify the encoding logic for shortNode and fullNode by getting rid of the Go interfaces.
This commit is contained in:
parent
d4a3bf1b23
commit
23da91f73b
7 changed files with 122 additions and 113 deletions
181
trie/hasher.go
181
trie/hasher.go
|
|
@ -17,6 +17,8 @@
|
||||||
package trie
|
package trie
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/ethereum/go-ethereum/crypto"
|
"github.com/ethereum/go-ethereum/crypto"
|
||||||
|
|
@ -54,7 +56,7 @@ func returnHasherToPool(h *hasher) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// hash collapses a node down into a hash node.
|
// hash collapses a node down into a hash node.
|
||||||
func (h *hasher) hash(n node, force bool) node {
|
func (h *hasher) hash(n node, force bool) []byte {
|
||||||
// Return the cached hash if it's available
|
// Return the cached hash if it's available
|
||||||
if hash, _ := n.cache(); hash != nil {
|
if hash, _ := n.cache(); hash != nil {
|
||||||
return hash
|
return hash
|
||||||
|
|
@ -62,101 +64,110 @@ func (h *hasher) hash(n node, force bool) node {
|
||||||
// Trie not processed yet, walk the children
|
// Trie not processed yet, walk the children
|
||||||
switch n := n.(type) {
|
switch n := n.(type) {
|
||||||
case *shortNode:
|
case *shortNode:
|
||||||
collapsed := h.hashShortNodeChildren(n)
|
enc := h.encodeShortNode(n)
|
||||||
hashed := h.shortnodeToHash(collapsed, force)
|
if len(enc) < 32 && !force {
|
||||||
if hn, ok := hashed.(hashNode); ok {
|
// Nodes smaller than 32 bytes are embedded directly in their parent.
|
||||||
n.flags.hash = hn
|
// In such cases, return the raw encoded blob instead of the node hash.
|
||||||
} else {
|
// It's essential to deep-copy the node blob, as the underlying buffer
|
||||||
n.flags.hash = nil
|
// of enc will be reused later.
|
||||||
|
buf := make([]byte, len(enc))
|
||||||
|
copy(buf, enc)
|
||||||
|
return buf
|
||||||
}
|
}
|
||||||
return hashed
|
hash := h.hashData(enc)
|
||||||
|
n.flags.hash = hash
|
||||||
|
return hash
|
||||||
|
|
||||||
case *fullNode:
|
case *fullNode:
|
||||||
collapsed := h.hashFullNodeChildren(n)
|
enc := h.encodeFullNode(n)
|
||||||
hashed := h.fullnodeToHash(collapsed, force)
|
if len(enc) < 32 && !force {
|
||||||
if hn, ok := hashed.(hashNode); ok {
|
// Nodes smaller than 32 bytes are embedded directly in their parent.
|
||||||
n.flags.hash = hn
|
// In such cases, return the raw encoded blob instead of the node hash.
|
||||||
} else {
|
// It's essential to deep-copy the node blob, as the underlying buffer
|
||||||
n.flags.hash = nil
|
// of enc will be reused later.
|
||||||
|
buf := make([]byte, len(enc))
|
||||||
|
copy(buf, enc)
|
||||||
|
return buf
|
||||||
}
|
}
|
||||||
return hashed
|
hash := h.hashData(enc)
|
||||||
default:
|
n.flags.hash = hash
|
||||||
// Value and hash nodes don't have children, so they're left as were
|
return hash
|
||||||
|
|
||||||
|
case hashNode:
|
||||||
|
// hash nodes don't have children, so they're left as were
|
||||||
return n
|
return n
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// hashShortNodeChildren returns a copy of the supplied shortNode, with its child
|
|
||||||
// being replaced by either the hash or an embedded node if the child is small.
|
|
||||||
func (h *hasher) hashShortNodeChildren(n *shortNode) *shortNode {
|
|
||||||
var collapsed shortNode
|
|
||||||
collapsed.Key = hexToCompact(n.Key)
|
|
||||||
switch n.Val.(type) {
|
|
||||||
case *fullNode, *shortNode:
|
|
||||||
collapsed.Val = h.hash(n.Val, false)
|
|
||||||
default:
|
default:
|
||||||
collapsed.Val = n.Val
|
panic(fmt.Errorf("unexpected node type, %T", n))
|
||||||
}
|
}
|
||||||
return &collapsed
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// hashFullNodeChildren returns a copy of the supplied fullNode, with its child
|
// encodeShortNode encodes the provided shortNode into the bytes. Notably, the
|
||||||
// being replaced by either the hash or an embedded node if the child is small.
|
// return slice must be deep-copied explicitly, otherwise the underlying slice
|
||||||
func (h *hasher) hashFullNodeChildren(n *fullNode) *fullNode {
|
// will be reused later.
|
||||||
var children [17]node
|
func (h *hasher) encodeShortNode(n *shortNode) []byte {
|
||||||
|
// Encode leaf node
|
||||||
|
if hasTerm(n.Key) {
|
||||||
|
var ln leafNodeEncoder
|
||||||
|
ln.Key = hexToCompact(n.Key)
|
||||||
|
ln.Val = n.Val.(valueNode)
|
||||||
|
ln.encode(h.encbuf)
|
||||||
|
return h.encodedBytes()
|
||||||
|
}
|
||||||
|
// Encode extension node
|
||||||
|
var en extNodeEncoder
|
||||||
|
en.Key = hexToCompact(n.Key)
|
||||||
|
en.Val = h.hash(n.Val, false)
|
||||||
|
en.encode(h.encbuf)
|
||||||
|
return h.encodedBytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
// fnEncoderPool is the pool for storing shared fullNode encoder to mitigate
|
||||||
|
// the significant memory allocation overhead.
|
||||||
|
var fnEncoderPool = sync.Pool{
|
||||||
|
New: func() interface{} {
|
||||||
|
var enc fullnodeEncoder
|
||||||
|
return &enc
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// encodeFullNode encodes the provided fullNode into the bytes. Notably, the
|
||||||
|
// return slice must be deep-copied explicitly, otherwise the underlying slice
|
||||||
|
// will be reused later.
|
||||||
|
func (h *hasher) encodeFullNode(n *fullNode) []byte {
|
||||||
|
fn := fnEncoderPool.Get().(*fullnodeEncoder)
|
||||||
|
fn.reset()
|
||||||
|
|
||||||
if h.parallel {
|
if h.parallel {
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
for i := 0; i < 16; i++ {
|
for i := 0; i < 16; i++ {
|
||||||
if child := n.Children[i]; child != nil {
|
if n.Children[i] == nil {
|
||||||
wg.Add(1)
|
continue
|
||||||
go func(i int) {
|
|
||||||
hasher := newHasher(false)
|
|
||||||
children[i] = hasher.hash(child, false)
|
|
||||||
returnHasherToPool(hasher)
|
|
||||||
wg.Done()
|
|
||||||
}(i)
|
|
||||||
} else {
|
|
||||||
children[i] = nilValueNode
|
|
||||||
}
|
}
|
||||||
|
wg.Add(1)
|
||||||
|
go func(i int) {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
h := newHasher(false)
|
||||||
|
fn.Children[i] = h.hash(n.Children[i], false)
|
||||||
|
returnHasherToPool(h)
|
||||||
|
}(i)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
} else {
|
} else {
|
||||||
for i := 0; i < 16; i++ {
|
for i := 0; i < 16; i++ {
|
||||||
if child := n.Children[i]; child != nil {
|
if child := n.Children[i]; child != nil {
|
||||||
children[i] = h.hash(child, false)
|
fn.Children[i] = h.hash(child, false)
|
||||||
} else {
|
|
||||||
children[i] = nilValueNode
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if n.Children[16] != nil {
|
if n.Children[16] != nil {
|
||||||
children[16] = n.Children[16]
|
fn.Children[16] = n.Children[16].(valueNode)
|
||||||
}
|
}
|
||||||
return &fullNode{flags: nodeFlag{}, Children: children}
|
fn.encode(h.encbuf)
|
||||||
}
|
fnEncoderPool.Put(fn)
|
||||||
|
|
||||||
// shortNodeToHash computes the hash of the given shortNode. The shortNode must
|
return h.encodedBytes()
|
||||||
// first be collapsed, with its key converted to compact form. If the RLP-encoded
|
|
||||||
// node data is smaller than 32 bytes, the node itself is returned.
|
|
||||||
func (h *hasher) shortnodeToHash(n *shortNode, force bool) node {
|
|
||||||
n.encode(h.encbuf)
|
|
||||||
enc := h.encodedBytes()
|
|
||||||
|
|
||||||
if len(enc) < 32 && !force {
|
|
||||||
return n // Nodes smaller than 32 bytes are stored inside their parent
|
|
||||||
}
|
|
||||||
return h.hashData(enc)
|
|
||||||
}
|
|
||||||
|
|
||||||
// fullnodeToHash computes the hash of the given fullNode. If the RLP-encoded
|
|
||||||
// node data is smaller than 32 bytes, the node itself is returned.
|
|
||||||
func (h *hasher) fullnodeToHash(n *fullNode, force bool) node {
|
|
||||||
n.encode(h.encbuf)
|
|
||||||
enc := h.encodedBytes()
|
|
||||||
|
|
||||||
if len(enc) < 32 && !force {
|
|
||||||
return n // Nodes smaller than 32 bytes are stored inside their parent
|
|
||||||
}
|
|
||||||
return h.hashData(enc)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// encodedBytes returns the result of the last encoding operation on h.encbuf.
|
// encodedBytes returns the result of the last encoding operation on h.encbuf.
|
||||||
|
|
@ -175,9 +186,10 @@ func (h *hasher) encodedBytes() []byte {
|
||||||
return h.tmp
|
return h.tmp
|
||||||
}
|
}
|
||||||
|
|
||||||
// hashData hashes the provided data
|
// hashData hashes the provided data. It is safe to modify the returned slice after
|
||||||
func (h *hasher) hashData(data []byte) hashNode {
|
// the function returns.
|
||||||
n := make(hashNode, 32)
|
func (h *hasher) hashData(data []byte) []byte {
|
||||||
|
n := make([]byte, 32)
|
||||||
h.sha.Reset()
|
h.sha.Reset()
|
||||||
h.sha.Write(data)
|
h.sha.Write(data)
|
||||||
h.sha.Read(n)
|
h.sha.Read(n)
|
||||||
|
|
@ -192,20 +204,17 @@ func (h *hasher) hashDataTo(dst, data []byte) {
|
||||||
h.sha.Read(dst)
|
h.sha.Read(dst)
|
||||||
}
|
}
|
||||||
|
|
||||||
// proofHash is used to construct trie proofs, and returns the 'collapsed'
|
// proofHash is used to construct trie proofs, returning the rlp-encoded node blobs.
|
||||||
// node (for later RLP encoding) as well as the hashed node -- unless the
|
// Note, only resolved node (shortNode or fullNode) is expected for proofing.
|
||||||
// node is smaller than 32 bytes, in which case it will be returned as is.
|
//
|
||||||
// This method does not do anything on value- or hash-nodes.
|
// It is safe to modify the returned slice after the function returns.
|
||||||
func (h *hasher) proofHash(original node) (collapsed, hashed node) {
|
func (h *hasher) proofHash(original node) []byte {
|
||||||
switch n := original.(type) {
|
switch n := original.(type) {
|
||||||
case *shortNode:
|
case *shortNode:
|
||||||
sn := h.hashShortNodeChildren(n)
|
return bytes.Clone(h.encodeShortNode(n))
|
||||||
return sn, h.shortnodeToHash(sn, false)
|
|
||||||
case *fullNode:
|
case *fullNode:
|
||||||
fn := h.hashFullNodeChildren(n)
|
return bytes.Clone(h.encodeFullNode(n))
|
||||||
return fn, h.fullnodeToHash(fn, false)
|
|
||||||
default:
|
default:
|
||||||
// Value and hash nodes don't have children, so they're left as were
|
panic(fmt.Errorf("unexpected node type, %T", original))
|
||||||
return n, n
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -240,9 +240,9 @@ func (it *nodeIterator) LeafProof() [][]byte {
|
||||||
|
|
||||||
for i, item := range it.stack[:len(it.stack)-1] {
|
for i, item := range it.stack[:len(it.stack)-1] {
|
||||||
// Gather nodes that end up as hash nodes (or the root)
|
// Gather nodes that end up as hash nodes (or the root)
|
||||||
node, hashed := hasher.proofHash(item.node)
|
enc := hasher.proofHash(item.node)
|
||||||
if _, ok := hashed.(hashNode); ok || i == 0 {
|
if len(enc) >= 32 || i == 0 {
|
||||||
proofs = append(proofs, nodeToBytes(node))
|
proofs = append(proofs, enc)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return proofs
|
return proofs
|
||||||
|
|
|
||||||
|
|
@ -68,10 +68,6 @@ type (
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
// nilValueNode is used when collapsing internal trie nodes for hashing, since
|
|
||||||
// unset children need to serialize correctly.
|
|
||||||
var nilValueNode = valueNode(nil)
|
|
||||||
|
|
||||||
// EncodeRLP encodes a full node into the consensus RLP format.
|
// EncodeRLP encodes a full node into the consensus RLP format.
|
||||||
func (n *fullNode) EncodeRLP(w io.Writer) error {
|
func (n *fullNode) EncodeRLP(w io.Writer) error {
|
||||||
eb := rlp.NewEncoderBuffer(w)
|
eb := rlp.NewEncoderBuffer(w)
|
||||||
|
|
|
||||||
|
|
@ -42,18 +42,29 @@ func (n *fullNode) encode(w rlp.EncoderBuffer) {
|
||||||
|
|
||||||
func (n *fullnodeEncoder) encode(w rlp.EncoderBuffer) {
|
func (n *fullnodeEncoder) encode(w rlp.EncoderBuffer) {
|
||||||
offset := w.List()
|
offset := w.List()
|
||||||
for _, c := range n.Children {
|
for i, c := range n.Children {
|
||||||
if c == nil {
|
if len(c) == 0 {
|
||||||
w.Write(rlp.EmptyString)
|
w.Write(rlp.EmptyString)
|
||||||
} else if len(c) < 32 {
|
|
||||||
w.Write(c) // rawNode
|
|
||||||
} else {
|
} else {
|
||||||
w.WriteBytes(c) // hashNode
|
// valueNode or hashNode
|
||||||
|
if i == 16 || len(c) >= 32 {
|
||||||
|
w.WriteBytes(c)
|
||||||
|
} else {
|
||||||
|
w.Write(c) // rawNode
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
w.ListEnd(offset)
|
w.ListEnd(offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (n *fullnodeEncoder) reset() {
|
||||||
|
for i, c := range n.Children {
|
||||||
|
if len(c) != 0 {
|
||||||
|
n.Children[i] = n.Children[i][:0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (n *shortNode) encode(w rlp.EncoderBuffer) {
|
func (n *shortNode) encode(w rlp.EncoderBuffer) {
|
||||||
offset := w.List()
|
offset := w.List()
|
||||||
w.WriteBytes(n.Key)
|
w.WriteBytes(n.Key)
|
||||||
|
|
@ -70,7 +81,7 @@ func (n *extNodeEncoder) encode(w rlp.EncoderBuffer) {
|
||||||
w.WriteBytes(n.Key)
|
w.WriteBytes(n.Key)
|
||||||
|
|
||||||
if n.Val == nil {
|
if n.Val == nil {
|
||||||
w.Write(rlp.EmptyString)
|
w.Write(rlp.EmptyString) // theoretically impossible to happen
|
||||||
} else if len(n.Val) < 32 {
|
} else if len(n.Val) < 32 {
|
||||||
w.Write(n.Val) // rawNode
|
w.Write(n.Val) // rawNode
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/ethereum/go-ethereum/common"
|
"github.com/ethereum/go-ethereum/common"
|
||||||
|
"github.com/ethereum/go-ethereum/crypto"
|
||||||
"github.com/ethereum/go-ethereum/ethdb"
|
"github.com/ethereum/go-ethereum/ethdb"
|
||||||
"github.com/ethereum/go-ethereum/log"
|
"github.com/ethereum/go-ethereum/log"
|
||||||
)
|
)
|
||||||
|
|
@ -85,16 +86,9 @@ func (t *Trie) Prove(key []byte, proofDb ethdb.KeyValueWriter) error {
|
||||||
defer returnHasherToPool(hasher)
|
defer returnHasherToPool(hasher)
|
||||||
|
|
||||||
for i, n := range nodes {
|
for i, n := range nodes {
|
||||||
var hn node
|
enc := hasher.proofHash(n)
|
||||||
n, hn = hasher.proofHash(n)
|
if len(enc) >= 32 || i == 0 {
|
||||||
if hash, ok := hn.(hashNode); ok || i == 0 {
|
proofDb.Put(crypto.Keccak256(enc), enc)
|
||||||
// If the node's database encoding is a hash (or is the
|
|
||||||
// root node), it becomes a proof element.
|
|
||||||
enc := nodeToBytes(n)
|
|
||||||
if !ok {
|
|
||||||
hash = hasher.hashData(enc)
|
|
||||||
}
|
|
||||||
proofDb.Put(hash, enc)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|
|
||||||
|
|
@ -626,7 +626,7 @@ func (t *Trie) resolveAndTrack(n hashNode, prefix []byte) (node, error) {
|
||||||
// Hash returns the root hash of the trie. It does not write to the
|
// Hash returns the root hash of the trie. It does not write to the
|
||||||
// database and can be used even if the trie doesn't have one.
|
// database and can be used even if the trie doesn't have one.
|
||||||
func (t *Trie) Hash() common.Hash {
|
func (t *Trie) Hash() common.Hash {
|
||||||
return common.BytesToHash(t.hashRoot().(hashNode))
|
return common.BytesToHash(t.hashRoot())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Commit collects all dirty nodes in the trie and replaces them with the
|
// Commit collects all dirty nodes in the trie and replaces them with the
|
||||||
|
|
@ -677,9 +677,9 @@ func (t *Trie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// hashRoot calculates the root hash of the given trie
|
// hashRoot calculates the root hash of the given trie
|
||||||
func (t *Trie) hashRoot() node {
|
func (t *Trie) hashRoot() []byte {
|
||||||
if t.root == nil {
|
if t.root == nil {
|
||||||
return hashNode(types.EmptyRootHash.Bytes())
|
return types.EmptyRootHash.Bytes()
|
||||||
}
|
}
|
||||||
// If the number of changes is below 100, we let one thread handle it
|
// If the number of changes is below 100, we let one thread handle it
|
||||||
h := newHasher(t.unhashed >= 100)
|
h := newHasher(t.unhashed >= 100)
|
||||||
|
|
|
||||||
|
|
@ -863,7 +863,6 @@ func (s *spongeDb) Flush() {
|
||||||
s.sponge.Write([]byte(key))
|
s.sponge.Write([]byte(key))
|
||||||
s.sponge.Write([]byte(s.values[key]))
|
s.sponge.Write([]byte(s.values[key]))
|
||||||
}
|
}
|
||||||
fmt.Println(len(s.keys))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// spongeBatch is a dummy batch which immediately writes to the underlying spongedb
|
// spongeBatch is a dummy batch which immediately writes to the underlying spongedb
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue