mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-05-24 08:49:29 +00:00
When splitStemValuesInsert inserts a new stem that shares a prefix with an existing stem, it increments the existing stem's depth and inserts a new internal node above it. The existing stem's on-disk path is derived from its depth via collectChildGroups + extendPathToGroupLeaf, so promoting its depth means it should be flushed at a new path. Previously, only the new stem (created in the divergence branch) was marked dirty. The promoted existing stem retained whatever dirty value it had — false if it was just deserialized from disk via a HashedNode resolve. collectNodes would then skip flushing the existing stem at its new path, while the new ancestor internal blob (also dirty) overwrites the existing stem's old blob at the prior path. The stem's data is left with no on-disk home, breaking subsequent reads with "missing trie node". The bug surfaces in the integration-test harness (state-actor builds a DB with single-stem-per-slot at depth 8, geth then mutates by adding a new stem that shares ≥8 prefix bits with the existing stem). After mutation, geth's `getValuesAtStem` resolves a HashedNode whose blob should be at the extended-depth path but isn't on disk. Mark `existing.dirty = true` when promoting the depth so collectNodes re-flushes the stem at its new path. Verification: the 100MB integration-test harness (which previously failed at block 9-10 with "missing trie node bdaf89... (path c96010)") now runs cleanly through 200+ blocks of ERC20 deploys and bloat transactions without any missing-trie-node errors.
351 lines
11 KiB
Go
351 lines
11 KiB
Go
// Copyright 2026 go-ethereum Authors
|
|
// This file is part of the go-ethereum library.
|
|
//
|
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package bintrie
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
|
|
"github.com/ethereum/go-ethereum/common"
|
|
)
|
|
|
|
// nodeResolverFn resolves a hashed node from the database.
|
|
type nodeResolverFn func([]byte, common.Hash) ([]byte, error)
|
|
|
|
// GetValue returns the value at (stem, suffix) or nil if absent. Thin
|
|
// wrapper over GetValuesAtStem — the underlying StemNode returns its
|
|
// 256-slot array as a slice header (no allocation), so the per-call cost
|
|
// is the tree walk plus one index.
|
|
func (s *nodeStore) GetValue(stem []byte, suffix byte, resolver nodeResolverFn) ([]byte, error) {
|
|
values, err := s.GetValuesAtStem(stem, resolver)
|
|
if err != nil || values == nil {
|
|
return nil, err
|
|
}
|
|
return values[suffix], nil
|
|
}
|
|
|
|
// GetValuesAtStem returns the 256 value slots at stem, or nil if the stem
|
|
// is not in the trie. The returned slice is a view over the in-place
|
|
// StemNode values array (no allocation) and must be treated read-only.
|
|
func (s *nodeStore) GetValuesAtStem(stem []byte, resolver nodeResolverFn) ([][]byte, error) {
|
|
cur := s.root
|
|
var parentIdx uint32
|
|
var parentIsLeft bool
|
|
|
|
for {
|
|
switch cur.Kind() {
|
|
case kindInternal:
|
|
node := s.getInternal(cur.Index())
|
|
if node.depth >= 31*8 {
|
|
return nil, errors.New("node too deep")
|
|
}
|
|
bit := stem[node.depth/8] >> (7 - (node.depth % 8)) & 1
|
|
parentIdx = cur.Index()
|
|
if bit == 0 {
|
|
parentIsLeft = true
|
|
cur = node.left
|
|
} else {
|
|
parentIsLeft = false
|
|
cur = node.right
|
|
}
|
|
|
|
case kindStem:
|
|
sn := s.getStem(cur.Index())
|
|
if sn.Stem != [StemSize]byte(stem[:StemSize]) {
|
|
return nil, nil
|
|
}
|
|
return sn.allValues(), nil
|
|
|
|
case kindHashed:
|
|
// HashedNode at root is impossible: NewBinaryTrie resolves the
|
|
// root eagerly before any query. Any HashedNode we encounter here
|
|
// is necessarily a child of a previously-visited internal node.
|
|
if resolver == nil {
|
|
return nil, errors.New("getValuesAtStem: cannot resolve hashed node without resolver")
|
|
}
|
|
hn := s.getHashed(cur.Index())
|
|
parentNode := s.getInternal(parentIdx)
|
|
path, err := keyToPath(int(parentNode.depth), stem)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("getValuesAtStem path error: %w", err)
|
|
}
|
|
data, err := resolver(path, hn.Hash())
|
|
if err != nil {
|
|
return nil, fmt.Errorf("getValuesAtStem resolve error: %w", err)
|
|
}
|
|
resolved, err := s.deserializeNodeWithHash(data, int(parentNode.depth)+1, hn.Hash())
|
|
if err != nil {
|
|
return nil, fmt.Errorf("getValuesAtStem deserialization error: %w", err)
|
|
}
|
|
s.freeHashedNode(cur.Index())
|
|
if parentIsLeft {
|
|
parentNode.left = resolved
|
|
} else {
|
|
parentNode.right = resolved
|
|
}
|
|
cur = resolved
|
|
|
|
case kindEmpty:
|
|
var values [StemNodeWidth][]byte
|
|
return values[:], nil
|
|
|
|
default:
|
|
return nil, fmt.Errorf("getValuesAtStem: unexpected node kind %d", cur.Kind())
|
|
}
|
|
}
|
|
}
|
|
|
|
// InsertSingle writes a single value slot at (stem, suffix). Thin wrapper
|
|
// over InsertValuesAtStem — builds a stack-allocated 256-slot array with
|
|
// only the target slot set and delegates. Matches the original design
|
|
// gballet referenced (comment 3101751325): one primary insert path; the
|
|
// single-slot variant dispatches through it so the split / resolve logic
|
|
// lives in one place.
|
|
func (s *nodeStore) InsertSingle(stem []byte, suffix byte, value []byte, resolver nodeResolverFn) error {
|
|
if len(value) != HashSize {
|
|
return errors.New("invalid insertion: value length")
|
|
}
|
|
var values [StemNodeWidth][]byte
|
|
values[suffix] = value
|
|
return s.InsertValuesAtStem(stem, values[:], resolver)
|
|
}
|
|
|
|
// InsertValuesAtStem writes the supplied value slots at stem. values may be
|
|
// sparse (nil entries are ignored). The recursive implementation dispatches
|
|
// through the same body, so a single code path handles internal descent,
|
|
// HashedNode resolution, stem merge, and stem split.
|
|
func (s *nodeStore) InsertValuesAtStem(stem []byte, values [][]byte, resolver nodeResolverFn) error {
|
|
var err error
|
|
s.root, err = s.insertValuesAtStem(s.root, stem, values, resolver, 0)
|
|
return err
|
|
}
|
|
|
|
func (s *nodeStore) insertValuesAtStem(ref nodeRef, stem []byte, values [][]byte, resolver nodeResolverFn, depth int) (nodeRef, error) {
|
|
switch ref.Kind() {
|
|
case kindInternal:
|
|
node := s.getInternal(ref.Index())
|
|
bit := stem[node.depth/8] >> (7 - (node.depth % 8)) & 1
|
|
if bit == 0 {
|
|
if node.left.Kind() == kindHashed {
|
|
if resolver == nil {
|
|
return ref, errors.New("insertValuesAtStem: cannot resolve hashed node without resolver")
|
|
}
|
|
hn := s.getHashed(node.left.Index())
|
|
path, err := keyToPath(int(node.depth), stem)
|
|
if err != nil {
|
|
return ref, fmt.Errorf("InsertValuesAtStem path error: %w", err)
|
|
}
|
|
data, err := resolver(path, hn.Hash())
|
|
if err != nil {
|
|
return ref, fmt.Errorf("InsertValuesAtStem resolve error: %w", err)
|
|
}
|
|
resolved, err := s.deserializeNodeWithHash(data, int(node.depth)+1, hn.Hash())
|
|
if err != nil {
|
|
return ref, fmt.Errorf("InsertValuesAtStem deserialization error: %w", err)
|
|
}
|
|
s.freeHashedNode(node.left.Index())
|
|
node.left = resolved
|
|
}
|
|
newChild, err := s.insertValuesAtStem(node.left, stem, values, resolver, depth+1)
|
|
if err != nil {
|
|
return ref, err
|
|
}
|
|
node.left = newChild
|
|
} else {
|
|
if node.right.Kind() == kindHashed {
|
|
if resolver == nil {
|
|
return ref, errors.New("insertValuesAtStem: cannot resolve hashed node without resolver")
|
|
}
|
|
hn := s.getHashed(node.right.Index())
|
|
path, err := keyToPath(int(node.depth), stem)
|
|
if err != nil {
|
|
return ref, fmt.Errorf("InsertValuesAtStem path error: %w", err)
|
|
}
|
|
data, err := resolver(path, hn.Hash())
|
|
if err != nil {
|
|
return ref, fmt.Errorf("InsertValuesAtStem resolve error: %w", err)
|
|
}
|
|
resolved, err := s.deserializeNodeWithHash(data, int(node.depth)+1, hn.Hash())
|
|
if err != nil {
|
|
return ref, fmt.Errorf("InsertValuesAtStem deserialization error: %w", err)
|
|
}
|
|
s.freeHashedNode(node.right.Index())
|
|
node.right = resolved
|
|
}
|
|
newChild, err := s.insertValuesAtStem(node.right, stem, values, resolver, depth+1)
|
|
if err != nil {
|
|
return ref, err
|
|
}
|
|
node.right = newChild
|
|
}
|
|
node.mustRecompute = true
|
|
node.dirty = true
|
|
return ref, nil
|
|
|
|
case kindStem:
|
|
sn := s.getStem(ref.Index())
|
|
if sn.Stem == [StemSize]byte(stem[:StemSize]) {
|
|
// Same stem — merge values (setValue marks dirty+mustRecompute)
|
|
for i, v := range values {
|
|
if v != nil {
|
|
sn.setValue(byte(i), v)
|
|
}
|
|
}
|
|
return ref, nil
|
|
}
|
|
// Different stem — split
|
|
return s.splitStemValuesInsert(ref, stem, values, resolver, depth)
|
|
|
|
case kindHashed:
|
|
hn := s.getHashed(ref.Index())
|
|
path, err := keyToPath(depth, stem)
|
|
if err != nil {
|
|
return ref, fmt.Errorf("InsertValuesAtStem path error: %w", err)
|
|
}
|
|
if resolver == nil {
|
|
return ref, errors.New("InsertValuesAtStem: resolver is nil")
|
|
}
|
|
data, err := resolver(path, hn.Hash())
|
|
if err != nil {
|
|
return ref, fmt.Errorf("InsertValuesAtStem resolve error: %w", err)
|
|
}
|
|
resolved, err := s.deserializeNodeWithHash(data, depth, hn.Hash())
|
|
if err != nil {
|
|
return ref, fmt.Errorf("InsertValuesAtStem deserialization error: %w", err)
|
|
}
|
|
s.freeHashedNode(ref.Index())
|
|
return s.insertValuesAtStem(resolved, stem, values, resolver, depth)
|
|
|
|
case kindEmpty:
|
|
// Create new StemNode. Flag flips before the value loop so an
|
|
// all-nil values input still marks the newly-created stem dirty.
|
|
stemIdx := s.allocStem()
|
|
sn := s.getStem(stemIdx)
|
|
copy(sn.Stem[:], stem[:StemSize])
|
|
sn.depth = uint8(depth)
|
|
sn.mustRecompute = true
|
|
sn.dirty = true
|
|
for i, v := range values {
|
|
if v != nil {
|
|
sn.setValue(byte(i), v)
|
|
}
|
|
}
|
|
return makeRef(kindStem, stemIdx), nil
|
|
|
|
default:
|
|
return ref, fmt.Errorf("insertValuesAtStem: unexpected kind %d", ref.Kind())
|
|
}
|
|
}
|
|
|
|
// splitStemValuesInsert splits a StemNode when the new stem diverges.
|
|
func (s *nodeStore) splitStemValuesInsert(existingRef nodeRef, newStem []byte, values [][]byte, resolver nodeResolverFn, depth int) (nodeRef, error) {
|
|
existing := s.getStem(existingRef.Index())
|
|
|
|
if int(existing.depth) >= StemSize*8 {
|
|
panic("splitStemValuesInsert: identical stems")
|
|
}
|
|
|
|
bitStem := existing.Stem[existing.depth/8] >> (7 - (existing.depth % 8)) & 1
|
|
nRef := s.newInternalRef(int(existing.depth))
|
|
nNode := s.getInternal(nRef.Index())
|
|
existing.depth++
|
|
// The existing stem's on-disk path is derived from its depth via
|
|
// extendPathToGroupLeaf. Promoting its depth changes that path, so the
|
|
// stem must be re-flushed at the new path; otherwise the old blob (at
|
|
// the prior path) gets overwritten by the new ancestor internal blob
|
|
// and the stem's data has no on-disk home.
|
|
existing.dirty = true
|
|
|
|
bitKey := newStem[nNode.depth/8] >> (7 - (nNode.depth % 8)) & 1
|
|
if bitKey == bitStem {
|
|
// Same direction — need deeper split
|
|
var child nodeRef
|
|
if bitStem == 0 {
|
|
nNode.left = existingRef
|
|
child = nNode.left
|
|
} else {
|
|
nNode.right = existingRef
|
|
child = nNode.right
|
|
}
|
|
newChild, err := s.insertValuesAtStem(child, newStem, values, resolver, depth+1)
|
|
if err != nil {
|
|
// Roll back the depth increment so a retry sees the same
|
|
// existing state and extracts bitStem at the correct offset.
|
|
// nRef itself leaks (no internal free-list), but the slot is
|
|
// unreachable from the tree and harmless.
|
|
existing.depth--
|
|
return nRef, err
|
|
}
|
|
if bitStem == 0 {
|
|
nNode.left = newChild
|
|
nNode.right = emptyRef
|
|
} else {
|
|
nNode.right = newChild
|
|
nNode.left = emptyRef
|
|
}
|
|
} else {
|
|
// Divergence — create new StemNode for the new values
|
|
newStemIdx := s.allocStem()
|
|
newSn := s.getStem(newStemIdx)
|
|
copy(newSn.Stem[:], newStem[:StemSize])
|
|
newSn.depth = nNode.depth + 1
|
|
newSn.mustRecompute = true
|
|
newSn.dirty = true
|
|
for i, v := range values {
|
|
if v != nil {
|
|
newSn.setValue(byte(i), v)
|
|
}
|
|
}
|
|
newStemRef := makeRef(kindStem, newStemIdx)
|
|
|
|
if bitStem == 0 {
|
|
nNode.left = existingRef
|
|
nNode.right = newStemRef
|
|
} else {
|
|
nNode.left = newStemRef
|
|
nNode.right = existingRef
|
|
}
|
|
}
|
|
return nRef, nil
|
|
}
|
|
|
|
func (s *nodeStore) Insert(key []byte, value []byte, resolver nodeResolverFn) error {
|
|
return s.InsertSingle(key[:StemSize], key[StemSize], value, resolver)
|
|
}
|
|
|
|
func (s *nodeStore) Get(key []byte, resolver nodeResolverFn) ([]byte, error) {
|
|
return s.GetValue(key[:StemSize], key[StemSize], resolver)
|
|
}
|
|
|
|
func (s *nodeStore) getHeight(ref nodeRef) int {
|
|
switch ref.Kind() {
|
|
case kindInternal:
|
|
node := s.getInternal(ref.Index())
|
|
lh := s.getHeight(node.left)
|
|
rh := s.getHeight(node.right)
|
|
if lh > rh {
|
|
return 1 + lh
|
|
}
|
|
return 1 + rh
|
|
case kindStem:
|
|
return 1
|
|
case kindEmpty:
|
|
return 0
|
|
default:
|
|
return 0
|
|
}
|
|
}
|