mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-06-12 01:41:36 +00:00
triedb/pathdb: bintrie snapshot generator
Adds generateBinTrieStems, the bintrie analogue of generateAccounts. It opens the bintrie via a sha256-aware bintrieDiskStore (the merkle disk store would always fail root validation against a binary node), iterates all leaves with binaryNodeIterator, aggregates them into per-stem builders, and emits one stem blob per stem boundary. Resume support is structural: ctx.marker is fed straight to the trie's NodeIterator, which uses binaryNodeIterator.seek (Commit 1) to position on the first leaf >= marker. Range proofs are deliberately skipped — the bintrie's Prove path is unimplemented and an iteration-only generation cycle is acceptable for a one-time startup cost. A bintrieGeneratorContext mirrors generatorContext but is much smaller: no holdable iterators (we walk the trie, not the existing flat state) and no two-tier marker (the bintrie key space is unified). checkAndFlushBin journals progress as a single 32-byte (stem || offset) key so resume can pick up mid-stem. generator.run dispatches on codec type so callers see a uniform lifecycle whether the underlying scheme is merkle or bintrie.
This commit is contained in:
parent
a1ff36d9e1
commit
0508d40aaf
3 changed files with 581 additions and 0 deletions
|
|
@ -130,6 +130,13 @@ func newGenerator(db ethdb.KeyValueStore, codec flatStateCodec, noBuild bool, pr
|
|||
}
|
||||
|
||||
// run starts the state snapshot generation in the background.
|
||||
//
|
||||
// The dispatch on codec type chooses between the merkle two-tier
|
||||
// account/storage iteration (`generate`) and the bintrie single-tier
|
||||
// stem iteration (`generateBintrie`). Both share the same lifecycle
|
||||
// (g.running, g.abort, g.done) and the same progress journal format,
|
||||
// so the only difference visible to callers of run/stop is which
|
||||
// background routine is launched.
|
||||
func (g *generator) run(root common.Hash) {
|
||||
if g.noBuild {
|
||||
log.Warn("Snapshot generation is not permitted")
|
||||
|
|
@ -140,6 +147,10 @@ func (g *generator) run(root common.Hash) {
|
|||
log.Warn("Paused the leftover generation cycle")
|
||||
}
|
||||
g.running = true
|
||||
if _, isBintrie := g.codec.(*bintrieFlatCodec); isBintrie {
|
||||
go g.generateBintrie(newBintrieGeneratorContext(root, g.progress, g.db))
|
||||
return
|
||||
}
|
||||
go g.generate(newGeneratorContext(root, g.progress, g.db, g.codec))
|
||||
}
|
||||
|
||||
|
|
|
|||
345
triedb/pathdb/generate_bintrie.go
Normal file
345
triedb/pathdb/generate_bintrie.go
Normal file
|
|
@ -0,0 +1,345 @@
|
|||
// Copyright 2026 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package pathdb
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/core/rawdb"
|
||||
"github.com/ethereum/go-ethereum/core/types"
|
||||
"github.com/ethereum/go-ethereum/ethdb"
|
||||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/trie/bintrie"
|
||||
"github.com/ethereum/go-ethereum/triedb/database"
|
||||
)
|
||||
|
||||
// bintrieDiskStore is the bintrie equivalent of diskStore (the merkle
|
||||
// reader used by the snapshot generator). The two differ in how
|
||||
// NodeReader validates the requested state root: the merkle store
|
||||
// hashes the on-disk account-trie root with keccak256, while the
|
||||
// bintrie root must be deserialized as a binary node and rehashed with
|
||||
// sha256 (the bintrie's native hash function). Sharing the merkle store
|
||||
// would always fail validation for a bintrie root.
|
||||
//
|
||||
// Once validated, both stores read trie nodes by path via
|
||||
// rawdb.ReadAccountTrieNode — the path-based key space is shared
|
||||
// between the two schemes (the bintrie sits in the same namespace as
|
||||
// the account trie because EIP-7864 unifies storage under accounts).
|
||||
type bintrieDiskStore struct {
|
||||
db ethdb.KeyValueStore
|
||||
}
|
||||
|
||||
// NodeReader validates that the bintrie root currently persisted at the
|
||||
// account-trie nil path matches the requested state root. The returned
|
||||
// reader is a plain path-based diskReader (the same one used by the
|
||||
// merkle generator) — only the validation logic differs.
|
||||
func (s *bintrieDiskStore) NodeReader(stateRoot common.Hash) (database.NodeReader, error) {
|
||||
// EmptyBinaryHash and the legacy EmptyRootHash are both treated as
|
||||
// "trie has no persisted root" — neither has a corresponding on-disk
|
||||
// node, and the bintrie itself short-circuits these cases inside
|
||||
// NewBinaryTrie. We accept them here without touching the disk.
|
||||
if stateRoot == (common.Hash{}) || stateRoot == types.EmptyBinaryHash || stateRoot == types.EmptyRootHash {
|
||||
return &diskReader{s.db}, nil
|
||||
}
|
||||
blob := rawdb.ReadAccountTrieNode(s.db, nil)
|
||||
if len(blob) == 0 {
|
||||
return nil, fmt.Errorf("bintrie state %x is not available (empty root node)", stateRoot)
|
||||
}
|
||||
// DeserializeNode rehashes via sha256 internally; the resulting node's
|
||||
// Hash() is the canonical bintrie root hash for the on-disk blob.
|
||||
root, err := bintrie.DeserializeNode(blob, 0)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("bintrie state %x: deserialize root: %w", stateRoot, err)
|
||||
}
|
||||
if got := root.Hash(); got != stateRoot {
|
||||
return nil, fmt.Errorf("bintrie state %x is not available (have %x)", stateRoot, got)
|
||||
}
|
||||
return &diskReader{s.db}, nil
|
||||
}
|
||||
|
||||
// bintrieGeneratorContext holds the state needed by a single bintrie
|
||||
// snapshot generation cycle. Unlike generatorContext (which manages two
|
||||
// holdable iterators over the on-disk merkle account/storage prefixes),
|
||||
// the bintrie path iterates the trie itself and never re-reads the
|
||||
// existing flat state. As a result the bintrie context is small: just
|
||||
// a write batch, the target root, and a single 32-byte progress marker
|
||||
// (the bintrie key (stem || offset) at which the previous run was
|
||||
// interrupted).
|
||||
//
|
||||
// The context is recreated on every generator restart, mirroring the
|
||||
// merkle generatorContext lifecycle.
|
||||
type bintrieGeneratorContext struct {
|
||||
root common.Hash // State root of the generation target
|
||||
marker []byte // Resume marker — a full 32-byte (stem || offset) key
|
||||
db ethdb.KeyValueStore // Key-value store containing trie nodes and stem blobs
|
||||
batch ethdb.Batch // Database batch for atomic writes
|
||||
logged time.Time // Timestamp of the last progress log message
|
||||
}
|
||||
|
||||
// newBintrieGeneratorContext initializes a fresh context bound to the
|
||||
// given target root, starting from the supplied resume marker. A nil or
|
||||
// zero-length marker means "start from the beginning of the trie".
|
||||
func newBintrieGeneratorContext(root common.Hash, marker []byte, db ethdb.KeyValueStore) *bintrieGeneratorContext {
|
||||
return &bintrieGeneratorContext{
|
||||
root: root,
|
||||
marker: marker,
|
||||
db: db,
|
||||
batch: db.NewBatch(),
|
||||
logged: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
// close releases any resources held by the context. The bintrie path
|
||||
// holds no long-lived iterators outside of generateBinTrieStems (which
|
||||
// owns its iterator and releases it on return), so this is currently a
|
||||
// no-op. It exists symmetrically with generatorContext.close so future
|
||||
// resource additions have an obvious place to land.
|
||||
func (ctx *bintrieGeneratorContext) close() {}
|
||||
|
||||
// generateBinTrieStems regenerates the bintrie flat-state by iterating
|
||||
// the entire bintrie and emitting one stem blob per stem. The iterator
|
||||
// yields leaves in stem-then-offset order, so we accumulate offsets in a
|
||||
// per-stem builder and flush whenever the stem changes (and once more
|
||||
// at the end of iteration).
|
||||
//
|
||||
// Resume support is structural: ctx.marker — a 32-byte (stem || offset)
|
||||
// key — is fed straight to BinaryTrie.NodeIterator which positions on the
|
||||
// first leaf with key >= marker via binaryNodeIterator.seek (added in
|
||||
// Commit 1). Resuming inside a stem is permitted; we re-encode the stem
|
||||
// from scratch on each visit, so paying the disk cost twice for the
|
||||
// "interrupted" stem is preferable to introducing a "partial-stem"
|
||||
// resume protocol.
|
||||
//
|
||||
// Range proofs are deliberately not used here. The bintrie's Prove path
|
||||
// is not implemented yet, and an iteration-only generation cycle is
|
||||
// acceptable because regeneration is a one-time cost paid at startup.
|
||||
//
|
||||
// Code chunks (offsets 128..255) are written to the same stem blobs as
|
||||
// account header and storage offsets — it keeps the stem encoding
|
||||
// symmetric with the trie and means a future re-iteration regenerates
|
||||
// the entire stem layout in one pass.
|
||||
func (g *generator) generateBinTrieStems(ctx *bintrieGeneratorContext) error {
|
||||
// Open the bintrie via the same disk-backed reader that the merkle
|
||||
// generator uses. The diskStore reads trie nodes via
|
||||
// rawdb.ReadAccountTrieNode/ReadStorageTrieNode against the
|
||||
// already-namespaced verkle table (db.diskdb wraps it under
|
||||
// VerklePrefix), so the same accessor works for both schemes.
|
||||
tr, err := bintrie.NewBinaryTrie(ctx.root, &bintrieDiskStore{db: ctx.db})
|
||||
if err != nil {
|
||||
log.Info("Bintrie missing, snapshotting paused", "state", ctx.root, "err", err)
|
||||
return errMissingTrie
|
||||
}
|
||||
it, err := tr.NodeIterator(ctx.marker)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var (
|
||||
// currentStem is a freshly-allocated copy of the most recently
|
||||
// observed leaf's stem. We never alias the iterator's slice
|
||||
// because it can be invalidated on Next.
|
||||
currentStem []byte
|
||||
builder = newStemBuilder()
|
||||
)
|
||||
|
||||
// flushStem encodes the accumulated builder into a stem blob and
|
||||
// writes it to the batch (or deletes the key if the result is
|
||||
// empty — which can happen if every observed offset was nil, but
|
||||
// that should be impossible for a well-formed trie).
|
||||
flushStem := func() {
|
||||
if currentStem == nil || builder.empty() {
|
||||
return
|
||||
}
|
||||
blob := builder.encode()
|
||||
if blob == nil {
|
||||
rawdb.DeleteBinTrieStem(ctx.batch, currentStem)
|
||||
} else {
|
||||
rawdb.WriteBinTrieStem(ctx.batch, currentStem, blob)
|
||||
}
|
||||
builder.reset()
|
||||
// Bookkeeping: count one stem per emitted blob.
|
||||
g.stats.accounts++
|
||||
}
|
||||
|
||||
for it.Next(true) {
|
||||
if !it.Leaf() {
|
||||
continue
|
||||
}
|
||||
key := it.LeafKey()
|
||||
val := it.LeafBlob()
|
||||
|
||||
// A well-formed bintrie leaf is always (32-byte key, 32-byte value).
|
||||
// Defensive check so a malformed trie surfaces as an error rather
|
||||
// than corrupting the flat state.
|
||||
if len(key) != bintrie.StemSize+1 {
|
||||
return fmt.Errorf("bintrie leaf key has len %d, want %d", len(key), bintrie.StemSize+1)
|
||||
}
|
||||
if len(val) != stemBlobValueSize {
|
||||
return fmt.Errorf("bintrie leaf value has len %d, want %d", len(val), stemBlobValueSize)
|
||||
}
|
||||
|
||||
// Stem boundary detection: if we've moved to a new stem, persist
|
||||
// the previous one before starting a new builder.
|
||||
if currentStem != nil && !bytes.Equal(key[:bintrie.StemSize], currentStem) {
|
||||
flushStem()
|
||||
currentStem = nil
|
||||
}
|
||||
if currentStem == nil {
|
||||
currentStem = make([]byte, bintrie.StemSize)
|
||||
copy(currentStem, key[:bintrie.StemSize])
|
||||
}
|
||||
// builder.set takes an owning copy internally so it's safe to
|
||||
// hand it the iterator's transient value slice.
|
||||
builder.set(key[bintrie.StemSize], val)
|
||||
|
||||
g.stats.slots++
|
||||
g.stats.storage += common.StorageSize(1 + bintrie.StemSize + len(val))
|
||||
|
||||
// Use the FULL leaf key (stem || offset) as the progress marker
|
||||
// so an interrupted run can resume mid-stem. checkAndFlushBin
|
||||
// takes an owning copy because the iterator's key may be
|
||||
// invalidated on the next call.
|
||||
marker := make([]byte, len(key))
|
||||
copy(marker, key)
|
||||
if err := g.checkAndFlushBin(ctx, marker); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := it.Error(); err != nil {
|
||||
return err
|
||||
}
|
||||
// Flush the trailing stem (the loop only flushes on transitions).
|
||||
flushStem()
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkAndFlushBin is the bintrie analogue of checkAndFlush. It saves
|
||||
// progress as a single 32-byte (stem || offset) key and writes the
|
||||
// batch when it exceeds IdealBatchSize, or when an abort signal is
|
||||
// received.
|
||||
//
|
||||
// Unlike the merkle variant, there are no snapshot iterators to reopen
|
||||
// here — the bintrie path iterates the trie itself, and the trie
|
||||
// iterator manages its own resource lifetime.
|
||||
func (g *generator) checkAndFlushBin(ctx *bintrieGeneratorContext, current []byte) error {
|
||||
var abort chan struct{}
|
||||
select {
|
||||
case abort = <-g.abort:
|
||||
default:
|
||||
}
|
||||
if ctx.batch.ValueSize() > ethdb.IdealBatchSize || abort != nil {
|
||||
if bytes.Compare(current, g.progress) < 0 {
|
||||
log.Error("Bintrie generator went backwards",
|
||||
"current", fmt.Sprintf("%x", current),
|
||||
"genMarker", fmt.Sprintf("%x", g.progress))
|
||||
}
|
||||
// Persist progress regardless of whether the batch is empty —
|
||||
// it may be that all observed stems were already on disk and
|
||||
// nothing actually changed.
|
||||
journalProgress(ctx.batch, current, g.stats)
|
||||
|
||||
if err := ctx.batch.Write(); err != nil {
|
||||
return err
|
||||
}
|
||||
ctx.batch.Reset()
|
||||
|
||||
g.lock.Lock()
|
||||
g.progress = current
|
||||
g.lock.Unlock()
|
||||
|
||||
if abort != nil {
|
||||
g.stats.log("Aborting bintrie snapshot generation", ctx.root, g.progress)
|
||||
return newAbortErr(abort)
|
||||
}
|
||||
}
|
||||
if time.Since(ctx.logged) > 8*time.Second {
|
||||
g.stats.log("Generating bintrie snapshot", ctx.root, g.progress)
|
||||
ctx.logged = time.Now()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// generateBintrie is the bintrie analogue of the merkle `generate`
|
||||
// background loop. The shapes mirror each other so the lifecycle and
|
||||
// shutdown protocol look identical to callers (`run` / `stop`):
|
||||
//
|
||||
// 1. Persist the initial progress marker if this is a fresh run
|
||||
// (so a crash after the first batch can find the genesis marker
|
||||
// during recovery).
|
||||
// 2. Drive generateBinTrieStems to completion (or until an abort).
|
||||
// 3. On clean completion, write the "done" sentinel marker, log a
|
||||
// summary, and close g.done.
|
||||
// 4. On abort (internal error or external signal), close the abort
|
||||
// channel and return.
|
||||
func (g *generator) generateBintrie(ctx *bintrieGeneratorContext) {
|
||||
g.stats.log("Resuming bintrie snapshot generation", ctx.root, g.progress)
|
||||
defer ctx.close()
|
||||
|
||||
if len(g.progress) == 0 {
|
||||
batch := ctx.db.NewBatch()
|
||||
rawdb.WriteSnapshotRoot(batch, ctx.root)
|
||||
journalProgress(batch, g.progress, g.stats)
|
||||
if err := batch.Write(); err != nil {
|
||||
log.Crit("Failed to write initialized bintrie state marker", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
var abort chan struct{}
|
||||
if err := g.generateBinTrieStems(ctx); err != nil {
|
||||
var aerr *abortErr
|
||||
if errors.As(err, &aerr) {
|
||||
abort = aerr.abort
|
||||
}
|
||||
// Internal error: wait for an external abort signal so the
|
||||
// caller's stop() invocation can synchronize.
|
||||
if abort == nil {
|
||||
abort = <-g.abort
|
||||
}
|
||||
close(abort)
|
||||
return
|
||||
}
|
||||
|
||||
// Successful completion: write the nil "done" marker so subsequent
|
||||
// loads know the snapshot is complete.
|
||||
journalProgress(ctx.batch, nil, g.stats)
|
||||
if err := ctx.batch.Write(); err != nil {
|
||||
log.Error("Failed to flush bintrie batch", "err", err)
|
||||
abort = <-g.abort
|
||||
close(abort)
|
||||
return
|
||||
}
|
||||
ctx.batch.Reset()
|
||||
|
||||
log.Info("Generated bintrie snapshot",
|
||||
"stems", g.stats.accounts,
|
||||
"leaves", g.stats.slots,
|
||||
"storage", g.stats.storage,
|
||||
"elapsed", common.PrettyDuration(time.Since(g.stats.start)))
|
||||
|
||||
g.lock.Lock()
|
||||
g.progress = nil
|
||||
g.lock.Unlock()
|
||||
close(g.done)
|
||||
|
||||
// Block until the eventual stop() so the caller can wait for us.
|
||||
abort = <-g.abort
|
||||
close(abort)
|
||||
}
|
||||
225
triedb/pathdb/generate_bintrie_test.go
Normal file
225
triedb/pathdb/generate_bintrie_test.go
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
// Copyright 2026 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package pathdb
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/core/rawdb"
|
||||
"github.com/ethereum/go-ethereum/core/types"
|
||||
"github.com/ethereum/go-ethereum/ethdb"
|
||||
"github.com/ethereum/go-ethereum/trie/bintrie"
|
||||
"github.com/holiman/uint256"
|
||||
)
|
||||
|
||||
// buildTestBintrie constructs a small in-memory bintrie containing two
|
||||
// accounts and one storage slot, persists its serialized nodes into the
|
||||
// supplied key-value store under the standard pathdb account-trie key
|
||||
// space (which is what the bintrie reads back via diskStore), and returns
|
||||
// the resulting state root.
|
||||
//
|
||||
// This helper sidesteps triedb.Database to avoid an import cycle: pathdb
|
||||
// is a child of triedb, so the test cannot construct a triedb.Database
|
||||
// here. Instead it manually persists the nodes returned by
|
||||
// bintrie.Commit, mirroring what writeNodes would do in production.
|
||||
func buildTestBintrie(t *testing.T, db ethdb.Database) (common.Hash, []addrAcct) {
|
||||
t.Helper()
|
||||
|
||||
// Use a memory-backed NodeDatabase for the empty starting trie. The
|
||||
// trie's nodeResolver returns nil for unknown hashes, which matches
|
||||
// the empty-trie semantics expected by NewBinaryTrie.
|
||||
tr, err := bintrie.NewBinaryTrie(types.EmptyBinaryHash, &diskStore{db: db})
|
||||
if err != nil {
|
||||
t.Fatalf("new bintrie: %v", err)
|
||||
}
|
||||
|
||||
addr1 := common.HexToAddress("0x1111111111111111111111111111111111111111")
|
||||
addr2 := common.HexToAddress("0x2222222222222222222222222222222222222222")
|
||||
slot := common.HexToHash("0x0000000000000000000000000000000000000000000000000000000000000007")
|
||||
slotValue := bytes.Repeat([]byte{0x77}, 32)
|
||||
|
||||
if err := tr.UpdateAccount(addr1, &types.StateAccount{
|
||||
Nonce: 1,
|
||||
Balance: uint256.NewInt(100),
|
||||
CodeHash: types.EmptyCodeHash[:],
|
||||
}, 0); err != nil {
|
||||
t.Fatalf("update account 1: %v", err)
|
||||
}
|
||||
if err := tr.UpdateAccount(addr2, &types.StateAccount{
|
||||
Nonce: 2,
|
||||
Balance: uint256.NewInt(200),
|
||||
CodeHash: types.EmptyCodeHash[:],
|
||||
}, 0); err != nil {
|
||||
t.Fatalf("update account 2: %v", err)
|
||||
}
|
||||
if err := tr.UpdateStorage(addr1, slot[:], slotValue); err != nil {
|
||||
t.Fatalf("update storage: %v", err)
|
||||
}
|
||||
root, nodes := tr.Commit(false)
|
||||
|
||||
// Persist all collected nodes via the standard account-trie path
|
||||
// scheme accessor — the bintrie sits in the same key space as the
|
||||
// account trie because there are no per-account storage tries in
|
||||
// EIP-7864.
|
||||
batch := db.NewBatch()
|
||||
for path, node := range nodes.Nodes {
|
||||
if node.IsDeleted() {
|
||||
rawdb.DeleteAccountTrieNode(batch, []byte(path))
|
||||
continue
|
||||
}
|
||||
rawdb.WriteAccountTrieNode(batch, []byte(path), node.Blob)
|
||||
}
|
||||
if err := batch.Write(); err != nil {
|
||||
t.Fatalf("flush trie nodes: %v", err)
|
||||
}
|
||||
|
||||
return root, []addrAcct{
|
||||
{addr: addr1, hasStorage: true, slot: slot, slotVal: slotValue},
|
||||
{addr: addr2, hasStorage: false},
|
||||
}
|
||||
}
|
||||
|
||||
// addrAcct describes a test account so the assertions phase can re-derive
|
||||
// the bintrie keys it should find on disk.
|
||||
type addrAcct struct {
|
||||
addr common.Address
|
||||
hasStorage bool
|
||||
slot common.Hash
|
||||
slotVal []byte
|
||||
}
|
||||
|
||||
// runTestBintrieGenerator wires up a generator with the bintrie codec and
|
||||
// drives generateBinTrieStems to completion. It returns the codec and the
|
||||
// underlying db so the assertions can read back stem blobs.
|
||||
func runTestBintrieGenerator(t *testing.T, db ethdb.Database, root common.Hash, marker []byte) {
|
||||
t.Helper()
|
||||
|
||||
codec := newBintrieFlatCodec(db)
|
||||
gen := &generator{
|
||||
db: db,
|
||||
codec: codec,
|
||||
stats: &generatorStats{start: time.Now()},
|
||||
abort: make(chan chan struct{}, 1),
|
||||
done: make(chan struct{}),
|
||||
}
|
||||
ctx := newBintrieGeneratorContext(root, marker, db)
|
||||
defer ctx.close()
|
||||
|
||||
if err := gen.generateBinTrieStems(ctx); err != nil {
|
||||
t.Fatalf("generateBinTrieStems: %v", err)
|
||||
}
|
||||
if err := ctx.batch.Write(); err != nil {
|
||||
t.Fatalf("final batch write: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBintrieGeneratorRebuildsStems verifies the happy-path:
|
||||
// - Build a small bintrie with two accounts and one storage slot.
|
||||
// - Run the generator on its root.
|
||||
// - Read back the stem blobs and check every offset round-trips.
|
||||
//
|
||||
// This is the primary "the generator works" test.
|
||||
func TestBintrieGeneratorRebuildsStems(t *testing.T) {
|
||||
db := rawdb.NewMemoryDatabase()
|
||||
root, accounts := buildTestBintrie(t, db)
|
||||
|
||||
// Sanity-check that the bintrie isn't trivially empty.
|
||||
if root == (common.Hash{}) || root == types.EmptyBinaryHash {
|
||||
t.Fatal("test bintrie produced an empty root")
|
||||
}
|
||||
|
||||
runTestBintrieGenerator(t, db, root, nil)
|
||||
|
||||
// Each test account must have its BasicData (offset 0) and CodeHash
|
||||
// (offset 1) entries on disk after generation.
|
||||
for _, a := range accounts {
|
||||
stem := bintrie.GetBinaryTreeKeyBasicData(a.addr)[:bintrie.StemSize]
|
||||
blob := rawdb.ReadBinTrieStem(db, stem)
|
||||
if len(blob) == 0 {
|
||||
t.Errorf("addr %x: stem blob missing after generation", a.addr)
|
||||
continue
|
||||
}
|
||||
basic, err := extractStemOffset(blob, bintrie.BasicDataLeafKey)
|
||||
if err != nil || len(basic) != 32 {
|
||||
t.Errorf("addr %x: BasicData missing/invalid (err=%v len=%d)", a.addr, err, len(basic))
|
||||
}
|
||||
codeHash, err := extractStemOffset(blob, bintrie.CodeHashLeafKey)
|
||||
if err != nil || !bytes.Equal(codeHash, types.EmptyCodeHash[:]) {
|
||||
t.Errorf("addr %x: CodeHash mismatch (err=%v got=%x)", a.addr, err, codeHash)
|
||||
}
|
||||
}
|
||||
|
||||
// The storage slot must be present at its derived stem (which may
|
||||
// equal the account's BasicData stem for header slots, or differ for
|
||||
// out-of-header slots — slot 7 is in-header so we expect the same
|
||||
// stem as BasicData).
|
||||
a := accounts[0]
|
||||
storageKey := bintrie.GetBinaryTreeKeyStorageSlot(a.addr, a.slot[:])
|
||||
storageBlob := rawdb.ReadBinTrieStem(db, storageKey[:bintrie.StemSize])
|
||||
if len(storageBlob) == 0 {
|
||||
t.Fatal("storage stem blob missing")
|
||||
}
|
||||
got, err := extractStemOffset(storageBlob, storageKey[bintrie.StemSize])
|
||||
if err != nil {
|
||||
t.Fatalf("extract storage offset: %v", err)
|
||||
}
|
||||
if !bytes.Equal(got, a.slotVal) {
|
||||
t.Errorf("storage value mismatch: got %x want %x", got, a.slotVal)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBintrieGeneratorResume verifies the resume path: a generator
|
||||
// started with a non-zero marker should produce on-disk stem blobs
|
||||
// covering only the keys at or after the marker. We pick the marker as
|
||||
// the SECOND populated stem in the trie so the assertions can verify
|
||||
// the first stem was skipped and the second-onwards stems were emitted.
|
||||
//
|
||||
// This is a thinner check than the rebuild test because the iterator's
|
||||
// resume contract is exercised more thoroughly by the iterator-level
|
||||
// tests in trie/bintrie/iterator_test.go — here we just confirm the
|
||||
// generator wires through to it.
|
||||
func TestBintrieGeneratorResume(t *testing.T) {
|
||||
db := rawdb.NewMemoryDatabase()
|
||||
root, accounts := buildTestBintrie(t, db)
|
||||
|
||||
// Pick the larger of the two account stems as the resume marker;
|
||||
// after generation, only the larger stem should appear on disk.
|
||||
stem1 := bintrie.GetBinaryTreeKeyBasicData(accounts[0].addr)[:bintrie.StemSize]
|
||||
stem2 := bintrie.GetBinaryTreeKeyBasicData(accounts[1].addr)[:bintrie.StemSize]
|
||||
larger := stem1
|
||||
smaller := stem2
|
||||
if bytes.Compare(stem1, stem2) < 0 {
|
||||
larger, smaller = stem2, stem1
|
||||
}
|
||||
|
||||
// Marker must be a 32-byte key (stem || offset). Offset 0 picks the
|
||||
// BasicData of the larger stem.
|
||||
marker := make([]byte, 32)
|
||||
copy(marker, larger)
|
||||
|
||||
runTestBintrieGenerator(t, db, root, marker)
|
||||
|
||||
if got := rawdb.ReadBinTrieStem(db, smaller); len(got) != 0 {
|
||||
t.Errorf("smaller stem should have been skipped by resume marker, got %x", got)
|
||||
}
|
||||
if got := rawdb.ReadBinTrieStem(db, larger); len(got) == 0 {
|
||||
t.Errorf("larger stem should have been generated after resume marker")
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue