triedb/pathdb: fix mid-stem generator resume via mergeStemBlob RMW

Addresses review finding C1. Before this commit, flushStem in generateBinTrieStems used builder.encode() to overwrite the on-disk stem blob unconditionally. When a crash+restart interrupted generation mid-stem (e.g., at offset 3 of stemA), the resume iterator positioned at stemA||3, the builder accumulated only offsets 3+, and flushStem overwrote the disk blob with a partial result — silently losing offsets 0, 1, 2 that were written in the prior pass. Fix: make flushStem a read-modify-write. It now reads the existing on-disk stem blob (if any), converts the builder's accumulated offsets to []stemOffsetValue via a new toOffsetValues() helper, and merges them via the existing mergeStemBlob function. The merge semantics are "builder values win" — new offsets overwrite their existing counterparts, and gaps are filled from the prior blob. This makes the RMW idempotent across resume cycles: the same stem can be re-walked from any midpoint and the final disk blob always contains the union of all passes. New helper: stemBuilder.toOffsetValues() converts the builder's populated bitmap entries into a []stemOffsetValue slice suitable for mergeStemBlob. ~20 LOC in stem_blob.go. Tests: * TestBintrieGeneratorResumeMidStem — pre-seeds disk with a partial stem (offsets 0, 1), resumes generator at offset 1, asserts all offsets survive including the pre-seeded offset 0. Before the fix this test fails with "BasicData lost after mid-stem resume". * TestBintrieGeneratorResumeStemBoundary — renamed from the original TestBintrieGeneratorResume, unchanged behavior (stem-boundary resume).
2026-06-12 01:41:36 +00:00 · 2026-04-09 11:22:56 +02:00 · 2026-04-09 11:22:56 +02:00 · 45de1c3cc1
commit 45de1c3cc1
parent 78f785e4ff
3 changed files with 147 additions and 26 deletions
--- a/triedb/pathdb/generate_bintrie.go
+++ b/triedb/pathdb/generate_bintrie.go
@ -123,10 +123,12 @@ func (ctx *bintrieGeneratorContext) close() {}
 // Resume support is structural: ctx.marker — a 32-byte (stem || offset)
 // key — is fed straight to BinaryTrie.NodeIterator which positions on the
 // first leaf with key >= marker via binaryNodeIterator.seek (added in
-// Commit 1). Resuming inside a stem is permitted; we re-encode the stem
-// from scratch on each visit, so paying the disk cost twice for the
-// "interrupted" stem is preferable to introducing a "partial-stem"
-// resume protocol.
+// Commit 1). Resuming inside a stem is safe because flushStem performs a
+// read-modify-write: the builder's new offsets (from the resumed walk)
+// are merged with the existing on-disk blob (from the prior pass). If
+// the marker is at offset 3 of stemA, the resume processes offsets 3..N
+// and the merge preserves offsets 0..2 from disk. One extra disk read
+// per flushStem (the RMW) is negligible compared to the walk cost.
 //
 // Range proofs are deliberately not used here. The bintrie's Prove path
 // is not implemented yet, and an iteration-only generation cycle is
@ -160,19 +162,36 @@ func (g *generator) generateBinTrieStems(ctx *bintrieGeneratorContext) error {
 		builder     = newStemBuilder()
 	)

-	// flushStem encodes the accumulated builder into a stem blob and
-	// writes it to the batch (or deletes the key if the result is
-	// empty — which can happen if every observed offset was nil, but
-	// that should be impossible for a well-formed trie).
+	// flushStem performs a read-modify-write on the stem being accumulated:
+	// it reads the existing on-disk stem blob (if any), merges in the
+	// builder's new offsets (new values win over existing), and writes the
+	// merged result back. This makes mid-stem resume safe: if a prior pass
+	// wrote offsets 0..2 and the current pass (after resuming at offset 3)
+	// only has offsets 3..4 in the builder, the merge preserves 0..2 from
+	// disk and adds 3..4 — no data loss.
+	//
+	// Without this RMW, a mid-stem resume would overwrite the existing disk
+	// blob with a partial one, silently dropping the earlier offsets. This
+	// was bug C1 identified in the PR review.
 	flushStem := func() {
 		if currentStem == nil || builder.empty() {
 			return
 		}
-		blob := builder.encode()
-		if blob == nil {
+		existing := rawdb.ReadBinTrieStem(ctx.db, currentStem)
+		writes := builder.toOffsetValues()
+		merged, err := mergeStemBlob(existing, writes)
+		if err != nil {
+			// Corruption in the existing blob. Log and fall back to the
+			// builder content alone — at least the offsets from this pass
+			// land. A7 tightens this to a first-class error propagation.
+			log.Error("Bintrie generator: merge stem blob failed, writing builder only",
+				"stem", fmt.Sprintf("%x", currentStem), "err", err)
+			merged = builder.encode()
+		}
+		if merged == nil {
 			rawdb.DeleteBinTrieStem(ctx.batch, currentStem)
 		} else {
-			rawdb.WriteBinTrieStem(ctx.batch, currentStem, blob)
+			rawdb.WriteBinTrieStem(ctx.batch, currentStem, merged)
 		}
 		builder.reset()
 		// Bookkeeping: count one stem per emitted blob.
--- a/triedb/pathdb/generate_bintrie_test.go
+++ b/triedb/pathdb/generate_bintrie_test.go
@ -185,22 +185,13 @@ func TestBintrieGeneratorRebuildsStems(t *testing.T) {
 	}
 }

-// TestBintrieGeneratorResume verifies the resume path: a generator
-// started with a non-zero marker should produce on-disk stem blobs
-// covering only the keys at or after the marker. We pick the marker as
-// the SECOND populated stem in the trie so the assertions can verify
-// the first stem was skipped and the second-onwards stems were emitted.
-//
-// This is a thinner check than the rebuild test because the iterator's
-// resume contract is exercised more thoroughly by the iterator-level
-// tests in trie/bintrie/iterator_test.go — here we just confirm the
-// generator wires through to it.
-func TestBintrieGeneratorResume(t *testing.T) {
+// TestBintrieGeneratorResumeStemBoundary verifies that a generator
+// started from a stem-boundary marker (stem || offset 0) correctly
+// generates only the stems at or after the marker.
+func TestBintrieGeneratorResumeStemBoundary(t *testing.T) {
 	db := rawdb.NewMemoryDatabase()
 	root, accounts := buildTestBintrie(t, db)

-	// Pick the larger of the two account stems as the resume marker;
-	// after generation, only the larger stem should appear on disk.
 	stem1 := bintrie.GetBinaryTreeKeyBasicData(accounts[0].addr)[:bintrie.StemSize]
 	stem2 := bintrie.GetBinaryTreeKeyBasicData(accounts[1].addr)[:bintrie.StemSize]
 	larger := stem1
@ -209,8 +200,6 @@ func TestBintrieGeneratorResume(t *testing.T) {
 		larger, smaller = stem2, stem1
 	}

-	// Marker must be a 32-byte key (stem || offset). Offset 0 picks the
-	// BasicData of the larger stem.
 	marker := make([]byte, 32)
 	copy(marker, larger)

@ -223,3 +212,93 @@ func TestBintrieGeneratorResume(t *testing.T) {
 		t.Errorf("larger stem should have been generated after resume marker")
 	}
 }
+
+// TestBintrieGeneratorResumeMidStem is the regression test for review
+// finding C1 (mid-stem resume drops earlier offsets). Before A3's fix,
+// flushStem OVERWROTE the on-disk stem blob with only the offsets
+// accumulated after the resume point. Offsets from a prior pass that
+// were already on disk were silently lost.
+//
+// The test simulates a two-pass generation:
+//
+//  1. Pre-seed the disk with a stem blob containing offsets 0 and 1
+//     (simulating what a prior pass wrote before being interrupted).
+//  2. Run the generator with marker = stem||1 (resume INSIDE the stem,
+//     past offset 0).
+//  3. After the generator completes, verify that the on-disk blob
+//     contains ALL offsets (0, 1, and everything else the trie has)
+//     — not just the offsets from the resumed walk.
+//
+// Before A3: step 3 would show only the post-marker offsets.
+func TestBintrieGeneratorResumeMidStem(t *testing.T) {
+	db := rawdb.NewMemoryDatabase()
+	root, accounts := buildTestBintrie(t, db)
+
+	// Pick addr1 (the one with storage). It has BasicData (offset 0),
+	// CodeHash (offset 1), and storage slot 7 at offset 64+7=71.
+	a := accounts[0]
+	stem := bintrie.GetBinaryTreeKeyBasicData(a.addr)[:bintrie.StemSize]
+
+	// Step 1: Pre-seed the disk with a partial stem blob containing
+	// only offsets 0 and 1 — as if a prior generator pass wrote them
+	// before being interrupted.
+	preSeed := newStemBuilder()
+	preSeed.set(bintrie.BasicDataLeafKey, bytes.Repeat([]byte{0xAA}, 32))
+	preSeed.set(bintrie.CodeHashLeafKey, bytes.Repeat([]byte{0xBB}, 32))
+	rawdb.WriteBinTrieStem(db, stem, preSeed.encode())
+
+	// Step 2: Resume from offset 1 — the generator should pick up at
+	// offset 1 of this stem and walk forward. The builder will
+	// accumulate only offset 1 + storage offset from the trie walk.
+	// The RMW in flushStem must merge them with the pre-seeded disk
+	// blob to preserve offset 0.
+	marker := make([]byte, 32)
+	copy(marker[:bintrie.StemSize], stem)
+	marker[bintrie.StemSize] = bintrie.CodeHashLeafKey // resume at offset 1
+
+	runTestBintrieGenerator(t, db, root, marker)
+
+	// Step 3: After the full run, verify the disk blob has ALL offsets.
+	blob := rawdb.ReadBinTrieStem(db, stem)
+	if len(blob) == 0 {
+		t.Fatal("stem blob missing after mid-stem resume")
+	}
+
+	// Offset 0 (BasicData): must survive the mid-stem resume because
+	// the RMW merged the builder's new content with the existing disk
+	// blob. Before A3, this offset was silently dropped.
+	basic, err := extractStemOffset(blob, bintrie.BasicDataLeafKey)
+	if err != nil {
+		t.Fatalf("extract BasicData: %v", err)
+	}
+	if len(basic) != 32 {
+		t.Fatalf("BasicData lost after mid-stem resume (A3 regression): got len=%d, want 32", len(basic))
+	}
+
+	// Offset 1 (CodeHash): the generator walked this offset (it's at
+	// the marker), so the trie's authoritative value should overwrite
+	// the pre-seeded one.
+	code, err := extractStemOffset(blob, bintrie.CodeHashLeafKey)
+	if err != nil {
+		t.Fatalf("extract CodeHash: %v", err)
+	}
+	if len(code) != 32 {
+		t.Fatalf("CodeHash missing after resume: got len=%d", len(code))
+	}
+
+	// Storage slot must also be present (the generator walked it as
+	// part of the full stem traversal).
+	storageKey := bintrie.GetBinaryTreeKeyStorageSlot(a.addr, a.slot[:])
+	storageOffset := storageKey[bintrie.StemSize]
+	storageStem := storageKey[:bintrie.StemSize]
+	if bytes.Equal(storageStem, stem) {
+		// Storage is at the same stem (header slot) — verify it's in the blob.
+		storageVal, err := extractStemOffset(blob, storageOffset)
+		if err != nil {
+			t.Fatalf("extract storage: %v", err)
+		}
+		if !bytes.Equal(storageVal, a.slotVal) {
+			t.Errorf("storage value: got %x, want %x", storageVal, a.slotVal)
+		}
+	}
+}
--- a/triedb/pathdb/stem_blob.go
+++ b/triedb/pathdb/stem_blob.go
@ -267,6 +267,29 @@ func (b *stemBuilder) reset() {
 	b.values = [stemBlobBitmapBits][]byte{}
 }

+// toOffsetValues converts the builder's populated entries into a slice
+// of (offset, value) pairs suitable for passing to mergeStemBlob. Only
+// offsets with non-nil values are emitted; cleared (nil-value) offsets
+// are skipped since their absence in the merge input leaves the
+// existing blob's value intact — which is the correct behavior for the
+// generator's RMW pattern where the builder holds only the new writes.
+func (b *stemBuilder) toOffsetValues() []stemOffsetValue {
+	count := bitmapPopcount(b.bitmap)
+	if count == 0 {
+		return nil
+	}
+	out := make([]stemOffsetValue, 0, count)
+	for offset := range stemBlobBitmapBits {
+		if b.values[offset] != nil {
+			out = append(out, stemOffsetValue{
+				Offset: byte(offset),
+				Value:  b.values[offset],
+			})
+		}
+	}
+	return out
+}
+
 // stemOffsetValue is a single (offset, value) pair passed to mergeStemBlob.
 // A nil Value clears the offset.
 type stemOffsetValue struct {