triedb/pathdb: fix mid-stem generator resume via mergeStemBlob RMW

Addresses review finding C1.

Before this commit, flushStem in generateBinTrieStems used
builder.encode() to overwrite the on-disk stem blob unconditionally.
When a crash+restart interrupted generation mid-stem (e.g., at offset 3
of stemA), the resume iterator positioned at stemA||3, the builder
accumulated only offsets 3+, and flushStem overwrote the disk blob with
a partial result — silently losing offsets 0, 1, 2 that were written in
the prior pass.

Fix: make flushStem a read-modify-write. It now reads the existing
on-disk stem blob (if any), converts the builder's accumulated offsets
to []stemOffsetValue via a new toOffsetValues() helper, and merges them
via the existing mergeStemBlob function. The merge semantics are
"builder values win" — new offsets overwrite their existing counterparts,
and gaps are filled from the prior blob. This makes the RMW idempotent
across resume cycles: the same stem can be re-walked from any midpoint
and the final disk blob always contains the union of all passes.

New helper: stemBuilder.toOffsetValues() converts the builder's
populated bitmap entries into a []stemOffsetValue slice suitable for
mergeStemBlob. ~20 LOC in stem_blob.go.

Tests:
  * TestBintrieGeneratorResumeMidStem — pre-seeds disk with a partial
    stem (offsets 0, 1), resumes generator at offset 1, asserts all
    offsets survive including the pre-seeded offset 0. Before the fix
    this test fails with "BasicData lost after mid-stem resume".
  * TestBintrieGeneratorResumeStemBoundary — renamed from the original
    TestBintrieGeneratorResume, unchanged behavior (stem-boundary
    resume).
This commit is contained in:
CPerezz 2026-04-09 11:22:56 +02:00
parent 78f785e4ff
commit 45de1c3cc1
No known key found for this signature in database
GPG key ID: 62045F34B97177DD
3 changed files with 147 additions and 26 deletions

View file

@ -123,10 +123,12 @@ func (ctx *bintrieGeneratorContext) close() {}
// Resume support is structural: ctx.marker — a 32-byte (stem || offset)
// key — is fed straight to BinaryTrie.NodeIterator which positions on the
// first leaf with key >= marker via binaryNodeIterator.seek (added in
// Commit 1). Resuming inside a stem is permitted; we re-encode the stem
// from scratch on each visit, so paying the disk cost twice for the
// "interrupted" stem is preferable to introducing a "partial-stem"
// resume protocol.
// Commit 1). Resuming inside a stem is safe because flushStem performs a
// read-modify-write: the builder's new offsets (from the resumed walk)
// are merged with the existing on-disk blob (from the prior pass). If
// the marker is at offset 3 of stemA, the resume processes offsets 3..N
// and the merge preserves offsets 0..2 from disk. One extra disk read
// per flushStem (the RMW) is negligible compared to the walk cost.
//
// Range proofs are deliberately not used here. The bintrie's Prove path
// is not implemented yet, and an iteration-only generation cycle is
@ -160,19 +162,36 @@ func (g *generator) generateBinTrieStems(ctx *bintrieGeneratorContext) error {
builder = newStemBuilder()
)
// flushStem encodes the accumulated builder into a stem blob and
// writes it to the batch (or deletes the key if the result is
// empty — which can happen if every observed offset was nil, but
// that should be impossible for a well-formed trie).
// flushStem performs a read-modify-write on the stem being accumulated:
// it reads the existing on-disk stem blob (if any), merges in the
// builder's new offsets (new values win over existing), and writes the
// merged result back. This makes mid-stem resume safe: if a prior pass
// wrote offsets 0..2 and the current pass (after resuming at offset 3)
// only has offsets 3..4 in the builder, the merge preserves 0..2 from
// disk and adds 3..4 — no data loss.
//
// Without this RMW, a mid-stem resume would overwrite the existing disk
// blob with a partial one, silently dropping the earlier offsets. This
// was bug C1 identified in the PR review.
flushStem := func() {
if currentStem == nil || builder.empty() {
return
}
blob := builder.encode()
if blob == nil {
existing := rawdb.ReadBinTrieStem(ctx.db, currentStem)
writes := builder.toOffsetValues()
merged, err := mergeStemBlob(existing, writes)
if err != nil {
// Corruption in the existing blob. Log and fall back to the
// builder content alone — at least the offsets from this pass
// land. A7 tightens this to a first-class error propagation.
log.Error("Bintrie generator: merge stem blob failed, writing builder only",
"stem", fmt.Sprintf("%x", currentStem), "err", err)
merged = builder.encode()
}
if merged == nil {
rawdb.DeleteBinTrieStem(ctx.batch, currentStem)
} else {
rawdb.WriteBinTrieStem(ctx.batch, currentStem, blob)
rawdb.WriteBinTrieStem(ctx.batch, currentStem, merged)
}
builder.reset()
// Bookkeeping: count one stem per emitted blob.

View file

@ -185,22 +185,13 @@ func TestBintrieGeneratorRebuildsStems(t *testing.T) {
}
}
// TestBintrieGeneratorResume verifies the resume path: a generator
// started with a non-zero marker should produce on-disk stem blobs
// covering only the keys at or after the marker. We pick the marker as
// the SECOND populated stem in the trie so the assertions can verify
// the first stem was skipped and the second-onwards stems were emitted.
//
// This is a thinner check than the rebuild test because the iterator's
// resume contract is exercised more thoroughly by the iterator-level
// tests in trie/bintrie/iterator_test.go — here we just confirm the
// generator wires through to it.
func TestBintrieGeneratorResume(t *testing.T) {
// TestBintrieGeneratorResumeStemBoundary verifies that a generator
// started from a stem-boundary marker (stem || offset 0) correctly
// generates only the stems at or after the marker.
func TestBintrieGeneratorResumeStemBoundary(t *testing.T) {
db := rawdb.NewMemoryDatabase()
root, accounts := buildTestBintrie(t, db)
// Pick the larger of the two account stems as the resume marker;
// after generation, only the larger stem should appear on disk.
stem1 := bintrie.GetBinaryTreeKeyBasicData(accounts[0].addr)[:bintrie.StemSize]
stem2 := bintrie.GetBinaryTreeKeyBasicData(accounts[1].addr)[:bintrie.StemSize]
larger := stem1
@ -209,8 +200,6 @@ func TestBintrieGeneratorResume(t *testing.T) {
larger, smaller = stem2, stem1
}
// Marker must be a 32-byte key (stem || offset). Offset 0 picks the
// BasicData of the larger stem.
marker := make([]byte, 32)
copy(marker, larger)
@ -223,3 +212,93 @@ func TestBintrieGeneratorResume(t *testing.T) {
t.Errorf("larger stem should have been generated after resume marker")
}
}
// TestBintrieGeneratorResumeMidStem is the regression test for review
// finding C1 (mid-stem resume drops earlier offsets). Before A3's fix,
// flushStem OVERWROTE the on-disk stem blob with only the offsets
// accumulated after the resume point. Offsets from a prior pass that
// were already on disk were silently lost.
//
// The test simulates a two-pass generation:
//
// 1. Pre-seed the disk with a stem blob containing offsets 0 and 1
// (simulating what a prior pass wrote before being interrupted).
// 2. Run the generator with marker = stem||1 (resume INSIDE the stem,
// past offset 0).
// 3. After the generator completes, verify that the on-disk blob
// contains ALL offsets (0, 1, and everything else the trie has)
// — not just the offsets from the resumed walk.
//
// Before A3: step 3 would show only the post-marker offsets.
func TestBintrieGeneratorResumeMidStem(t *testing.T) {
db := rawdb.NewMemoryDatabase()
root, accounts := buildTestBintrie(t, db)
// Pick addr1 (the one with storage). It has BasicData (offset 0),
// CodeHash (offset 1), and storage slot 7 at offset 64+7=71.
a := accounts[0]
stem := bintrie.GetBinaryTreeKeyBasicData(a.addr)[:bintrie.StemSize]
// Step 1: Pre-seed the disk with a partial stem blob containing
// only offsets 0 and 1 — as if a prior generator pass wrote them
// before being interrupted.
preSeed := newStemBuilder()
preSeed.set(bintrie.BasicDataLeafKey, bytes.Repeat([]byte{0xAA}, 32))
preSeed.set(bintrie.CodeHashLeafKey, bytes.Repeat([]byte{0xBB}, 32))
rawdb.WriteBinTrieStem(db, stem, preSeed.encode())
// Step 2: Resume from offset 1 — the generator should pick up at
// offset 1 of this stem and walk forward. The builder will
// accumulate only offset 1 + storage offset from the trie walk.
// The RMW in flushStem must merge them with the pre-seeded disk
// blob to preserve offset 0.
marker := make([]byte, 32)
copy(marker[:bintrie.StemSize], stem)
marker[bintrie.StemSize] = bintrie.CodeHashLeafKey // resume at offset 1
runTestBintrieGenerator(t, db, root, marker)
// Step 3: After the full run, verify the disk blob has ALL offsets.
blob := rawdb.ReadBinTrieStem(db, stem)
if len(blob) == 0 {
t.Fatal("stem blob missing after mid-stem resume")
}
// Offset 0 (BasicData): must survive the mid-stem resume because
// the RMW merged the builder's new content with the existing disk
// blob. Before A3, this offset was silently dropped.
basic, err := extractStemOffset(blob, bintrie.BasicDataLeafKey)
if err != nil {
t.Fatalf("extract BasicData: %v", err)
}
if len(basic) != 32 {
t.Fatalf("BasicData lost after mid-stem resume (A3 regression): got len=%d, want 32", len(basic))
}
// Offset 1 (CodeHash): the generator walked this offset (it's at
// the marker), so the trie's authoritative value should overwrite
// the pre-seeded one.
code, err := extractStemOffset(blob, bintrie.CodeHashLeafKey)
if err != nil {
t.Fatalf("extract CodeHash: %v", err)
}
if len(code) != 32 {
t.Fatalf("CodeHash missing after resume: got len=%d", len(code))
}
// Storage slot must also be present (the generator walked it as
// part of the full stem traversal).
storageKey := bintrie.GetBinaryTreeKeyStorageSlot(a.addr, a.slot[:])
storageOffset := storageKey[bintrie.StemSize]
storageStem := storageKey[:bintrie.StemSize]
if bytes.Equal(storageStem, stem) {
// Storage is at the same stem (header slot) — verify it's in the blob.
storageVal, err := extractStemOffset(blob, storageOffset)
if err != nil {
t.Fatalf("extract storage: %v", err)
}
if !bytes.Equal(storageVal, a.slotVal) {
t.Errorf("storage value: got %x, want %x", storageVal, a.slotVal)
}
}
}

View file

@ -267,6 +267,29 @@ func (b *stemBuilder) reset() {
b.values = [stemBlobBitmapBits][]byte{}
}
// toOffsetValues converts the builder's populated entries into a slice
// of (offset, value) pairs suitable for passing to mergeStemBlob. Only
// offsets with non-nil values are emitted; cleared (nil-value) offsets
// are skipped since their absence in the merge input leaves the
// existing blob's value intact — which is the correct behavior for the
// generator's RMW pattern where the builder holds only the new writes.
func (b *stemBuilder) toOffsetValues() []stemOffsetValue {
count := bitmapPopcount(b.bitmap)
if count == 0 {
return nil
}
out := make([]stemOffsetValue, 0, count)
for offset := range stemBlobBitmapBits {
if b.values[offset] != nil {
out = append(out, stemOffsetValue{
Offset: byte(offset),
Value: b.values[offset],
})
}
}
return out
}
// stemOffsetValue is a single (offset, value) pair passed to mergeStemBlob.
// A nil Value clears the offset.
type stemOffsetValue struct {