trie/bintrie: simplify StemNode to array-of-slices representation

Gballet asked on PR #34055 (comments 3100043116, 3100050542, and the
bit-check dedup at 3100114416 / 3100878310) to revert StemNode from the
packed-bytes representation to the straightforward array-of-slices.

Before: StemNode carried a bitmap, a concatenated valueData []byte, a
count, and a shared COW flag. Every read/write went through a bit-count
posInData lookup; every mutation through ensureWritable COW.

After: values [StemNodeWidth][]byte — 256 slots, nil == absent. No
bitmap lookup, no COW. Direct sn.values[suffix] access.

Supporting changes:
- Drop posInData, ensureWritable; rewrite getValue/hasValue/allValues/
  setValue as trivial slice access.
- Hash() iterates sn.values directly, matching master's shape.
- SerializeNode emits the bitmap + concatenated bytes on the wire from
  the array-of-slices at serialize time; wire format unchanged.
- decodeNode populates sn.values[i] slots by aliasing the serialized
  buffer (zero-copy).
- NodeStore.Copy deep-copies each slot.
- splitStemValuesInsert + the insertSingleInternal paths write directly
  to sn.values[i].

Trade-off: stems now carry 256 []byte headers (6144 B) instead of 1
concatenated slice (~32 B) + bitmap. Stem-pool scan cost returns to
parity with master (the existing valueData pointer already made the
pool non-noscan; rollback adds 255 more pointers per stem). The primary
arena win — pointer-free InternalNode pool — is preserved.
This commit is contained in:
CPerezz 2026-04-18 18:53:07 +02:00
parent 3885e539b7
commit e1859ea864
No known key found for this signature in database
GPG key ID: 62045F34B97177DD
4 changed files with 56 additions and 116 deletions

View file

@ -170,12 +170,18 @@ func (s *NodeStore) Copy() *NodeStore {
cp := *chunk
ns.stemChunks[i] = &cp
}
// Deep-copy each stem's value slots — they may alias serialized buffers,
// so we can't rely on the chunk-wise struct copy above.
for i := uint32(0); i < s.stemCount; i++ {
src := s.getStem(i)
dst := ns.getStem(i)
if len(src.valueData) > 0 {
dst.valueData = make([]byte, len(src.valueData))
copy(dst.valueData, src.valueData)
for j, v := range src.values {
if v == nil {
continue
}
cp := make([]byte, len(v))
copy(cp, v)
dst.values[j] = cp
}
}
ns.hashedChunks = make([]*[storeChunkSize]HashedNode, len(s.hashedChunks))

View file

@ -17,106 +17,40 @@
package bintrie
import (
"math/bits"
"github.com/ethereum/go-ethereum/common"
)
// StemNode holds up to 256 values sharing a 31-byte stem, packed via bitmap.
// StemNode holds up to 256 values sharing a 31-byte stem.
//
// Invariant: dirty=false implies mustRecompute=false. Every mutation that
// invalidates the cached hash MUST also mark the blob for re-flush.
type StemNode struct {
Stem [StemSize]byte
bitmap [StemBitmapSize]byte
valueData []byte
count uint16
depth uint8
shared bool // true if valueData is shared with serialized input
Stem [StemSize]byte
values [StemNodeWidth][]byte // nil == slot absent
depth uint8
mustRecompute bool // hash is stale (cleared by Hash)
dirty bool // on-disk blob is stale (cleared by CollectNodes)
hash common.Hash // cached hash when mustRecompute == false
}
// posInData returns the offset within valueData, or -1 if absent.
func (sn *StemNode) posInData(suffix byte) int {
idx := int(suffix)
if sn.bitmap[idx/8]>>(7-(idx%8))&1 == 0 {
return -1
}
// Count the bits set before this position to determine the offset
pos := 0
byteIdx := idx / 8
for i := 0; i < byteIdx; i++ {
pos += bits.OnesCount8(sn.bitmap[i])
}
// Count bits in the partial byte
mask := byte(0xFF) << (8 - (idx % 8))
pos += bits.OnesCount8(sn.bitmap[byteIdx] & mask)
return pos
}
func (sn *StemNode) getValue(suffix byte) []byte {
pos := sn.posInData(suffix)
if pos < 0 {
return nil
}
start := pos * HashSize
return sn.valueData[start : start+HashSize]
return sn.values[suffix]
}
func (sn *StemNode) hasValue(suffix byte) bool {
idx := int(suffix)
return sn.bitmap[idx/8]>>(7-(idx%8))&1 == 1
return sn.values[suffix] != nil
}
// allValues returns all 256 values (nil for absent positions).
// allValues returns the underlying slot array as a slice. nil entries mean
// absent. Callers must treat it as read-only.
func (sn *StemNode) allValues() [][]byte {
values := make([][]byte, StemNodeWidth)
dataIdx := 0
for i := range StemNodeWidth {
if sn.bitmap[i/8]>>(7-(i%8))&1 == 1 {
values[i] = sn.valueData[dataIdx*HashSize : (dataIdx+1)*HashSize]
dataIdx++
}
}
return values
}
// ensureWritable copies valueData if shared (copy-on-write).
func (sn *StemNode) ensureWritable() {
if sn.shared || cap(sn.valueData)-len(sn.valueData) < HashSize {
newData := make([]byte, len(sn.valueData), len(sn.valueData)+HashSize*4)
copy(newData, sn.valueData)
sn.valueData = newData
sn.shared = false
}
return sn.values[:]
}
func (sn *StemNode) setValue(suffix byte, value []byte) {
sn.ensureWritable()
idx := int(suffix)
pos := sn.posInData(suffix)
if pos >= 0 {
copy(sn.valueData[pos*HashSize:], value[:HashSize])
return
}
sn.bitmap[idx/8] |= 1 << (7 - (idx % 8))
sn.count++
insertPos := 0
byteIdx := idx / 8
for i := 0; i < byteIdx; i++ {
insertPos += bits.OnesCount8(sn.bitmap[i])
}
mask := byte(0xFF) << (8 - (idx % 8))
insertPos += bits.OnesCount8(sn.bitmap[byteIdx] & mask)
insertOffset := insertPos * HashSize
sn.valueData = append(sn.valueData, make([]byte, HashSize)...)
copy(sn.valueData[insertOffset+HashSize:], sn.valueData[insertOffset:len(sn.valueData)-HashSize])
copy(sn.valueData[insertOffset:], value[:HashSize])
sn.values[suffix] = value
}
func (sn *StemNode) Hash() common.Hash {
@ -128,28 +62,21 @@ func (sn *StemNode) Hash() common.Hash {
h := newSha256()
defer returnSha256(h)
// Hash each present value
dataIdx := 0
for i := range StemNodeWidth {
if sn.bitmap[i/8]>>(7-(i%8))&1 == 1 {
v := sn.valueData[dataIdx*HashSize : (dataIdx+1)*HashSize]
for i, v := range sn.values {
if v != nil {
h.Reset()
h.Write(v)
h.Sum(data[i][:0])
dataIdx++
}
}
h.Reset()
for level := 1; level <= 8; level++ {
for i := range StemNodeWidth / (1 << level) {
h.Reset()
if data[i*2] == (common.Hash{}) && data[i*2+1] == (common.Hash{}) {
data[i] = common.Hash{}
continue
}
h.Reset()
h.Write(data[i*2][:])
h.Write(data[i*2+1][:])
data[i] = common.Hash(h.Sum(nil))

View file

@ -123,12 +123,26 @@ func (s *NodeStore) serializeNode(ref nodeRef) []byte {
case kindStem:
sn := s.getStem(ref.Index())
serializedLen := NodeTypeBytes + StemSize + StemBitmapSize + len(sn.valueData)
// Count present slots to size the blob.
var count int
for _, v := range sn.values {
if v != nil {
count++
}
}
serializedLen := NodeTypeBytes + StemSize + StemBitmapSize + count*HashSize
serialized := make([]byte, serializedLen)
serialized[0] = nodeTypeStem
copy(serialized[NodeTypeBytes:NodeTypeBytes+StemSize], sn.Stem[:])
copy(serialized[NodeTypeBytes+StemSize:NodeTypeBytes+StemSize+StemBitmapSize], sn.bitmap[:])
copy(serialized[NodeTypeBytes+StemSize+StemBitmapSize:], sn.valueData)
bitmap := serialized[NodeTypeBytes+StemSize : NodeTypeBytes+StemSize+StemBitmapSize]
offset := NodeTypeBytes + StemSize + StemBitmapSize
for i, v := range sn.values {
if v != nil {
bitmap[i/8] |= 1 << (7 - (i % 8))
copy(serialized[offset:offset+HashSize], v)
offset += HashSize
}
}
return serialized
default:
@ -184,27 +198,25 @@ func (s *NodeStore) decodeNode(serialized []byte, depth int, hn common.Hash, mus
return ref, nil
case nodeTypeStem:
if len(serialized) < 64 {
if len(serialized) < NodeTypeBytes+StemSize+StemBitmapSize {
return emptyRef, errInvalidSerializedLength
}
stemIdx := s.allocStem()
sn := s.getStem(stemIdx)
copy(sn.Stem[:], serialized[NodeTypeBytes:NodeTypeBytes+StemSize])
copy(sn.bitmap[:], serialized[NodeTypeBytes+StemSize:NodeTypeBytes+StemSize+StemBitmapSize])
var count uint16
for i := range StemBitmapSize {
count += uint16(bits.OnesCount8(sn.bitmap[i]))
bitmap := serialized[NodeTypeBytes+StemSize : NodeTypeBytes+StemSize+StemBitmapSize]
offset := NodeTypeBytes + StemSize + StemBitmapSize
for i := range StemNodeWidth {
if bitmap[i/8]>>(7-(i%8))&1 != 1 {
continue
}
if len(serialized) < offset+HashSize {
return emptyRef, errInvalidSerializedLength
}
// Zero-copy: each slot aliases the serialized input buffer.
sn.values[i] = serialized[offset : offset+HashSize]
offset += HashSize
}
sn.count = count
dataStart := NodeTypeBytes + StemSize + StemBitmapSize
dataEnd := dataStart + int(count)*HashSize
if len(serialized) < dataEnd {
return emptyRef, errInvalidSerializedLength
}
// Zero-copy: aliases the serialized buffer; ensureWritable() COWs before mutation.
sn.valueData = serialized[dataStart:dataEnd]
sn.shared = true
sn.depth = uint8(depth)
sn.hash = hn
sn.mustRecompute = mustRecompute
@ -279,13 +291,10 @@ func (s *NodeStore) toDot(ref nodeRef, parent, path string) string {
me := fmt.Sprintf("stem%s", path)
ret := fmt.Sprintf("%s [label=\"stem=%x c=%x\"]\n", me, sn.Stem, sn.Hash())
ret = fmt.Sprintf("%s %s -> %s\n", ret, parent, me)
idx := 0
for i := range StemNodeWidth {
if sn.bitmap[i/8]>>(7-i%8)&1 != 1 {
for i, v := range sn.values {
if v == nil {
continue
}
v := sn.valueData[idx*HashSize : (idx+1)*HashSize]
idx++
ret += fmt.Sprintf("%s%x [label=\"%x\"]\n", me, i, v)
ret += fmt.Sprintf("%s -> %s%x\n", me, me, i)
}

View file

@ -479,9 +479,7 @@ func (s *NodeStore) insertValuesAtStem(ref nodeRef, stem []byte, values [][]byte
sn.dirty = true
for i, v := range values {
if v != nil {
sn.count++
sn.bitmap[i/8] |= 1 << (7 - (i % 8))
sn.valueData = append(sn.valueData, v[:HashSize]...)
sn.values[i] = v
}
}
return makeRef(kindStem, stemIdx), nil