trie/bintrie: fix hashInternal at group boundaries to match read-back hash

For an InternalNode at a group-boundary depth, hashInternal previously
computed pure SHA256(left, right) recursion over the natural-depth
in-memory tree built by UpdateStem. But serializeSubtree extends stems
to the group's bottom layer via key-bit extension, so the on-disk blob
encodes an extended-depth structure. When a fresh reader deserializes
that blob, hashInternal walks the extended-depth in-memory tree and
produces a different value.

The result was that for any subtree with multiple stems sharing a
prefix shorter than groupDepth, the parent's stored child-hash (computed
from the natural-depth in-memory tree at commit time) did not equal the
child blob's read-back hash. Geth's own write-read cycle was internally
inconsistent: state-actor's groundtruth test, which feeds the same
stems through state-actor's streaming builder and geth's UpdateStem +
Commit and diffs the resulting on-disk node sets, fails at n=4 with a
mismatched slot hash in the root group blob.

At a group boundary, recompute the hash via serializeSubtree +
groupedRecursiveHash so that the parent stores the same value the
reader will compute when it deserializes the child blob.

The fix is gated on groupDepth > 0, so nodeStore tests that construct
the store directly without going through NewBinaryTrie retain the
existing pure-SHA256 recursion semantics.

Verification:
- All existing trie/bintrie tests pass unchanged.
- state-actor/generator's TestStreamingMatchesGethCommit (which compares
  state-actor's streaming builder output to geth's Commit output
  byte-for-byte at n=2,4,8,32,128) now passes.
This commit is contained in:
weiihann 2026-05-13 09:39:19 +08:00
parent 012bec0eb1
commit a1eaa21f24
3 changed files with 68 additions and 1 deletions

View file

@ -41,6 +41,16 @@ type nodeStore struct {
// stem-split keeps the old stem at a deeper position), so they don't
// have free lists.
freeHashed []uint32
// groupDepth, when > 0, makes hashInternal compute the same hash that
// would be produced by serializing the node to a group blob and
// recursively hashing the blob's bottom-layer leaves. This matches the
// hash a fresh reader would compute via deserializeSubtree, keeping the
// parent-stored child hash byte-equal to the child's read-back hash.
// When 0, hashInternal falls back to the natural-depth SHA256 recursion
// used by tests that construct nodeStore directly without going through
// NewBinaryTrie.
groupDepth int
}
func newNodeStore() *nodeStore {

View file

@ -59,12 +59,30 @@ var parallelHashDepth = min(bits.Len(uint(runtime.NumCPU())), 8)
// goroutine while the right subtree is hashed inline, then the two digests
// are combined. Below that threshold the goroutine spawn cost outweighs the
// hashing work, so deeper nodes hash both children sequentially.
//
// At a group boundary (depth % groupDepth == 0, with groupDepth > 0) the
// hash is computed from the group's bottom-layer slot hashes via the same
// serialize-then-recursive-hash that a fresh reader applies after reading
// the node's blob from disk. This guarantees the parent's stored child
// hash equals the child's read-back hash byte-for-byte, regardless of
// whether the in-memory subtree placed its stems at natural depth (via
// UpdateStem split) or extended depth (via deserializeSubtree).
func (s *nodeStore) hashInternal(idx uint32) common.Hash {
node := s.getInternal(idx)
if !node.mustRecompute {
return node.hash
}
if s.groupDepth > 0 && int(node.depth)%s.groupDepth == 0 {
bitmapSize := bitmapSizeForDepth(s.groupDepth)
bitmap := make([]byte, bitmapSize)
var hashes []common.Hash
s.serializeSubtree(makeRef(kindInternal, idx), s.groupDepth, 0, int(node.depth), bitmap, &hashes)
node.hash = groupedRecursiveHash(s.groupDepth, bitmap, hashes)
node.mustRecompute = false
return node.hash
}
if int(node.depth) < parallelHashDepth {
var input [64]byte
var lh common.Hash
@ -107,6 +125,43 @@ func (s *nodeStore) hashInternal(idx uint32) common.Hash {
return node.hash
}
// groupedRecursiveHash computes the recursive SHA256 hash of a group-blob
// subtree, given the bitmap and present-hash list produced by serializeSubtree.
//
// The output is byte-equal to what hashInternal would compute on a tree
// produced by deserializeSubtree reading the same (bitmap, hashes) — i.e.,
// it's the hash the fresh-reader path produces. Use this from hashInternal
// at group-boundary depths so the parent's stored child hash matches the
// child's read-back hash regardless of in-memory stem placement.
func groupedRecursiveHash(groupDepth int, bitmap []byte, hashes []common.Hash) common.Hash {
nSlots := 1 << groupDepth
leaves := make([]common.Hash, nSlots)
hashIdx := 0
for i := 0; i < nSlots; i++ {
if bitmap[i/8]>>(7-(i%8))&1 == 1 {
leaves[i] = hashes[hashIdx]
hashIdx++
}
}
level := leaves
var zero common.Hash
for len(level) > 1 {
next := make([]common.Hash, len(level)/2)
for i := 0; i < len(next); i++ {
l, r := level[2*i], level[2*i+1]
if l == zero && r == zero {
continue
}
var buf [64]byte
copy(buf[:32], l[:])
copy(buf[32:], r[:])
next[i] = sha256.Sum256(buf[:])
}
level = next
}
return level[0]
}
// serializeSubtree recursively collects child hashes from a subtree of InternalNodes.
// It traverses up to `remainingDepth` levels, storing hashes of bottom-layer children.
// position tracks the current index (0 to 2^groupDepth - 1) for bitmap placement.

View file

@ -133,8 +133,10 @@ func NewBinaryTrie(root common.Hash, db database.NodeDatabase, groupDepth int) (
if err != nil {
return nil, err
}
store := newNodeStore()
store.groupDepth = groupDepth
t := &BinaryTrie{
store: newNodeStore(),
store: store,
reader: reader,
tracer: trie.NewPrevalueTracer(),
groupDepth: groupDepth,