go-ethereum/trie/bintrie/store_commit.go
CPerezz e1859ea864
trie/bintrie: simplify StemNode to array-of-slices representation
Gballet asked on PR #34055 (comments 3100043116, 3100050542, and the
bit-check dedup at 3100114416 / 3100878310) to revert StemNode from the
packed-bytes representation to the straightforward array-of-slices.

Before: StemNode carried a bitmap, a concatenated valueData []byte, a
count, and a shared COW flag. Every read/write went through a bit-count
posInData lookup; every mutation through ensureWritable COW.

After: values [StemNodeWidth][]byte — 256 slots, nil == absent. No
bitmap lookup, no COW. Direct sn.values[suffix] access.

Supporting changes:
- Drop posInData, ensureWritable; rewrite getValue/hasValue/allValues/
  setValue as trivial slice access.
- Hash() iterates sn.values directly, matching master's shape.
- SerializeNode emits the bitmap + concatenated bytes on the wire from
  the array-of-slices at serialize time; wire format unchanged.
- decodeNode populates sn.values[i] slots by aliasing the serialized
  buffer (zero-copy).
- NodeStore.Copy deep-copies each slot.
- splitStemValuesInsert + the insertSingleInternal paths write directly
  to sn.values[i].

Trade-off: stems now carry 256 []byte headers (6144 B) instead of 1
concatenated slice (~32 B) + bitmap. Stem-pool scan cost returns to
parity with master (the existing valueData pointer already made the
pool non-noscan; rollback adds 255 more pointers per stem). The primary
arena win — pointer-free InternalNode pool — is preserved.
2026-04-18 18:53:07 +02:00

311 lines
9 KiB
Go

// Copyright 2026 go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package bintrie
import (
"crypto/sha256"
"errors"
"fmt"
"math/bits"
"runtime"
"sync"
"github.com/ethereum/go-ethereum/common"
)
type NodeFlushFn func(path []byte, hash common.Hash, serialized []byte)
func (s *NodeStore) Hash() common.Hash {
return s.computeHash(s.root)
}
func (s *NodeStore) computeHash(ref nodeRef) common.Hash {
switch ref.Kind() {
case kindInternal:
return s.hashInternal(ref.Index())
case kindStem:
return s.getStem(ref.Index()).Hash()
case kindHashed:
return s.getHashed(ref.Index()).Hash()
case kindEmpty:
return common.Hash{}
default:
return common.Hash{}
}
}
// parallelDepth returns the tree depth below which hashInternal spawns
// goroutines for shallow-depth parallelism.
func parallelDepth() int {
return min(bits.Len(uint(runtime.NumCPU())), 8)
}
// hashInternal hashes an InternalNode and caches the result.
//
// At shallow depths (< parallelDepth()) the left subtree is hashed in a
// goroutine while the right subtree is hashed inline, then the two digests
// are combined. Below that threshold the goroutine spawn cost outweighs the
// hashing work, so deeper nodes hash both children sequentially via the
// pooled hasher.
func (s *NodeStore) hashInternal(idx uint32) common.Hash {
node := s.getInternal(idx)
if !node.mustRecompute {
return node.hash
}
if int(node.depth) < parallelDepth() {
var input [64]byte
var lh common.Hash
var wg sync.WaitGroup
if !node.left.IsEmpty() {
wg.Add(1)
go func() {
lh = s.computeHash(node.left)
wg.Done()
}()
}
if !node.right.IsEmpty() {
rh := s.computeHash(node.right)
copy(input[32:], rh[:])
}
wg.Wait()
copy(input[:32], lh[:])
node.hash = sha256.Sum256(input[:])
node.mustRecompute = false
return node.hash
}
h := newSha256()
defer returnSha256(h)
if !node.left.IsEmpty() {
lh := s.computeHash(node.left)
h.Write(lh[:])
} else {
h.Write(make([]byte, HashSize))
}
if !node.right.IsEmpty() {
rh := s.computeHash(node.right)
h.Write(rh[:])
} else {
h.Write(make([]byte, HashSize))
}
node.hash = common.BytesToHash(h.Sum(nil))
node.mustRecompute = false
return node.hash
}
// SerializeNode serializes a node into the flat on-disk format.
func (s *NodeStore) serializeNode(ref nodeRef) []byte {
switch ref.Kind() {
case kindInternal:
node := s.getInternal(ref.Index())
var serialized [NodeTypeBytes + HashSize + HashSize]byte
serialized[0] = nodeTypeInternal
lh := s.computeHash(node.left)
rh := s.computeHash(node.right)
copy(serialized[NodeTypeBytes:NodeTypeBytes+HashSize], lh[:])
copy(serialized[NodeTypeBytes+HashSize:], rh[:])
return serialized[:]
case kindStem:
sn := s.getStem(ref.Index())
// Count present slots to size the blob.
var count int
for _, v := range sn.values {
if v != nil {
count++
}
}
serializedLen := NodeTypeBytes + StemSize + StemBitmapSize + count*HashSize
serialized := make([]byte, serializedLen)
serialized[0] = nodeTypeStem
copy(serialized[NodeTypeBytes:NodeTypeBytes+StemSize], sn.Stem[:])
bitmap := serialized[NodeTypeBytes+StemSize : NodeTypeBytes+StemSize+StemBitmapSize]
offset := NodeTypeBytes + StemSize + StemBitmapSize
for i, v := range sn.values {
if v != nil {
bitmap[i/8] |= 1 << (7 - (i % 8))
copy(serialized[offset:offset+HashSize], v)
offset += HashSize
}
}
return serialized
default:
panic(fmt.Sprintf("SerializeNode: unexpected node kind %d", ref.Kind()))
}
}
var errInvalidSerializedLength = errors.New("invalid serialized node length")
// DeserializeNode deserializes a node from bytes, recomputing its hash. The
// returned node is marked dirty (provenance unknown, safe re-flush default).
func (s *NodeStore) deserializeNode(serialized []byte, depth int) (nodeRef, error) {
return s.decodeNode(serialized, depth, common.Hash{}, true, true)
}
// DeserializeNodeWithHash deserializes a node whose hash is already known and
// whose blob is already on disk (mustRecompute=false, dirty=false).
func (s *NodeStore) deserializeNodeWithHash(serialized []byte, depth int, hn common.Hash) (nodeRef, error) {
return s.decodeNode(serialized, depth, hn, false, false)
}
func (s *NodeStore) decodeNode(serialized []byte, depth int, hn common.Hash, mustRecompute, dirty bool) (nodeRef, error) {
if len(serialized) == 0 {
return emptyRef, nil
}
switch serialized[0] {
case nodeTypeInternal:
if len(serialized) != NodeTypeBytes+2*HashSize {
return emptyRef, errInvalidSerializedLength
}
var leftHash, rightHash common.Hash
copy(leftHash[:], serialized[NodeTypeBytes:NodeTypeBytes+HashSize])
copy(rightHash[:], serialized[NodeTypeBytes+HashSize:])
var leftRef, rightRef nodeRef
if leftHash != (common.Hash{}) {
leftRef = s.newHashedRef(leftHash)
}
if rightHash != (common.Hash{}) {
rightRef = s.newHashedRef(rightHash)
}
ref := s.newInternalRef(depth)
node := s.getInternal(ref.Index())
node.left = leftRef
node.right = rightRef
if !mustRecompute {
node.hash = hn
node.mustRecompute = false
}
node.dirty = dirty
return ref, nil
case nodeTypeStem:
if len(serialized) < NodeTypeBytes+StemSize+StemBitmapSize {
return emptyRef, errInvalidSerializedLength
}
stemIdx := s.allocStem()
sn := s.getStem(stemIdx)
copy(sn.Stem[:], serialized[NodeTypeBytes:NodeTypeBytes+StemSize])
bitmap := serialized[NodeTypeBytes+StemSize : NodeTypeBytes+StemSize+StemBitmapSize]
offset := NodeTypeBytes + StemSize + StemBitmapSize
for i := range StemNodeWidth {
if bitmap[i/8]>>(7-(i%8))&1 != 1 {
continue
}
if len(serialized) < offset+HashSize {
return emptyRef, errInvalidSerializedLength
}
// Zero-copy: each slot aliases the serialized input buffer.
sn.values[i] = serialized[offset : offset+HashSize]
offset += HashSize
}
sn.depth = uint8(depth)
sn.hash = hn
sn.mustRecompute = mustRecompute
sn.dirty = dirty
return makeRef(kindStem, stemIdx), nil
default:
return emptyRef, errors.New("invalid node type")
}
}
// CollectNodes flushes every node that needs flushing via flushfn in post-order.
// Invariant: any ancestor of a node that needs flushing is itself marked, so a
// clean root means the whole subtree is clean.
func (s *NodeStore) collectNodes(ref nodeRef, path []byte, flushfn NodeFlushFn) error {
switch ref.Kind() {
case kindEmpty:
return nil
case kindInternal:
node := s.getInternal(ref.Index())
if !node.dirty {
return nil
}
leftPath := make([]byte, len(path)+1)
copy(leftPath, path)
leftPath[len(path)] = 0
if err := s.collectNodes(node.left, leftPath, flushfn); err != nil {
return err
}
rightPath := make([]byte, len(path)+1)
copy(rightPath, path)
rightPath[len(path)] = 1
if err := s.collectNodes(node.right, rightPath, flushfn); err != nil {
return err
}
flushfn(path, s.computeHash(ref), s.serializeNode(ref))
node.dirty = false
return nil
case kindStem:
sn := s.getStem(ref.Index())
if !sn.dirty {
return nil
}
flushfn(path, s.computeHash(ref), s.serializeNode(ref))
sn.dirty = false
return nil
case kindHashed:
return nil // Already committed
default:
return fmt.Errorf("CollectNodes: unexpected kind %d", ref.Kind())
}
}
func (s *NodeStore) toDot(ref nodeRef, parent, path string) string {
switch ref.Kind() {
case kindInternal:
node := s.getInternal(ref.Index())
me := fmt.Sprintf("internal%s", path)
ret := fmt.Sprintf("%s [label=\"I: %x\"]\n", me, s.computeHash(ref))
if len(parent) > 0 {
ret = fmt.Sprintf("%s %s -> %s\n", ret, parent, me)
}
if !node.left.IsEmpty() {
ret += s.toDot(node.left, me, fmt.Sprintf("%s%02x", path, 0))
}
if !node.right.IsEmpty() {
ret += s.toDot(node.right, me, fmt.Sprintf("%s%02x", path, 1))
}
return ret
case kindStem:
sn := s.getStem(ref.Index())
me := fmt.Sprintf("stem%s", path)
ret := fmt.Sprintf("%s [label=\"stem=%x c=%x\"]\n", me, sn.Stem, sn.Hash())
ret = fmt.Sprintf("%s %s -> %s\n", ret, parent, me)
for i, v := range sn.values {
if v == nil {
continue
}
ret += fmt.Sprintf("%s%x [label=\"%x\"]\n", me, i, v)
ret += fmt.Sprintf("%s -> %s%x\n", me, me, i)
}
return ret
case kindHashed:
hn := s.getHashed(ref.Index())
me := fmt.Sprintf("hash%s", path)
ret := fmt.Sprintf("%s [label=\"%x\"]\n", me, hn.Hash())
ret = fmt.Sprintf("%s %s -> %s\n", ret, parent, me)
return ret
default:
return ""
}
}