From 1051e7be0c7a962f07b4d2aae5673564d332753a Mon Sep 17 00:00:00 2001 From: weiihann Date: Thu, 12 Feb 2026 22:23:57 +0800 Subject: [PATCH] nomt/merkle, nomt/db: adapt to EIP-7864 stem-based types (Phase D) Remove leaf compaction from PageWalker.compactStep, replace KeyValue with StemKeyValue throughout the merkle engine and worker, and update DB.Update to accept pre-hashed stem key-value pairs. Key change: singleThreadedUpdate now uses the same depth-7 child-index partitioning as the parallel path, ensuring identical intermediate hashes without leaf compaction. Co-Authored-By: Claude Opus 4.6 --- nomt/db/db.go | 32 ++-- nomt/db/db_test.go | 72 ++++---- nomt/merkle/pagewalker.go | 49 ++---- nomt/merkle/pagewalker_test.go | 290 +++++++++++++-------------------- nomt/merkle/worker.go | 65 ++++---- nomt/merkle/worker_test.go | 193 ++++++++++++++-------- 6 files changed, 336 insertions(+), 365 deletions(-) diff --git a/nomt/db/db.go b/nomt/db/db.go index 4ead07b20f..cd805ecdae 100644 --- a/nomt/db/db.go +++ b/nomt/db/db.go @@ -127,15 +127,14 @@ func (db *DB) SyncSeqn() uint32 { return db.syncSeqn } -// Update applies a batch of leaf operations to the trie. +// Update applies a sorted batch of stem key-value pairs to the trie. // -// Operations are sorted by key internally. The function: +// The pairs must be pre-sorted by stem path. The function: // 1. Builds a PageSet from Bitbox -// 2. Groups operations by their terminal node position -// 3. Runs the PageWalker to produce updated pages -// 4. Persists updated pages via Bitbox sync -// 5. Returns the new root hash -func (db *DB) Update(ops []core.LeafOp) (core.Node, error) { +// 2. Runs the parallel PageWalker to produce updated pages +// 3. Persists updated pages via Bitbox sync +// 4. Returns the new root hash +func (db *DB) Update(ops []core.StemKeyValue) (core.Node, error) { if len(ops) == 0 { return db.Root(), nil } @@ -143,26 +142,15 @@ func (db *DB) Update(ops []core.LeafOp) (core.Node, error) { db.mu.Lock() defer db.mu.Unlock() - // Sort ops by key path. + // Sort by stem path. sort.Slice(ops, func(i, j int) bool { - return ops[i].Key != ops[j].Key && keyLess(&ops[i].Key, &ops[j].Key) + return stemLess(&ops[i].Stem, &ops[j].Stem) }) - // Convert to KeyValue (filter out deletes). - kvs := make([]core.KeyValue, 0, len(ops)) - for _, op := range ops { - if op.Value != nil { - kvs = append(kvs, core.KeyValue{Key: op.Key, Value: *op.Value}) - } - } - if len(kvs) == 0 { - return db.root, nil - } - pageSetFactory := func() merkle.PageSet { return newBitboxPageSet(db.bb) } - out := merkle.ParallelUpdate(db.root, kvs, db.numWorkers, pageSetFactory) + out := merkle.ParallelUpdate(db.root, ops, db.numWorkers, pageSetFactory) // Persist updated pages. walPath := filepath.Join(db.dataDir, walFileName) @@ -260,7 +248,7 @@ func pageIDKey(id core.PageID) string { return string(encoded[:]) } -func keyLess(a, b *core.KeyPath) bool { +func stemLess(a, b *core.StemPath) bool { for i := range a { if a[i] < b[i] { return true diff --git a/nomt/db/db_test.go b/nomt/db/db_test.go index 0141e12674..ba00761eb3 100644 --- a/nomt/db/db_test.go +++ b/nomt/db/db_test.go @@ -31,9 +31,8 @@ func TestReopenPreservesState(t *testing.T) { db, err := Open(dir, DefaultConfig()) require.NoError(t, err) - v := core.ValueHash{0x01} - newRoot, err := db.Update([]core.LeafOp{ - {Key: makeKey(0x10), Value: &v}, + newRoot, err := db.Update([]core.StemKeyValue{ + {Stem: makeStem(0x10), Hash: makeHash(0x01)}, }) require.NoError(t, err) require.False(t, core.IsTerminator(&newRoot)) @@ -55,15 +54,12 @@ func TestUpdateSingleKey(t *testing.T) { require.NoError(t, err) defer db.Close() - v := core.ValueHash{0x42} - kp := makeKey(0x10) // starts with 0 bit - - newRoot, err := db.Update([]core.LeafOp{ - {Key: kp, Value: &v}, + newRoot, err := db.Update([]core.StemKeyValue{ + {Stem: makeStem(0x10), Hash: makeHash(0x42)}, }) require.NoError(t, err) - assert.True(t, core.IsLeaf(&newRoot)) + assert.False(t, core.IsTerminator(&newRoot)) assert.Equal(t, newRoot, db.Root()) } @@ -73,22 +69,20 @@ func TestUpdateMultipleKeys(t *testing.T) { require.NoError(t, err) defer db.Close() - v := core.ValueHash{0x01} - ops := []core.LeafOp{ - {Key: makeKey(0x10), Value: &v}, - {Key: makeKey(0x80), Value: &v}, + ops := []core.StemKeyValue{ + {Stem: makeStem(0x10), Hash: makeHash(0x01)}, + {Stem: makeStem(0x80), Hash: makeHash(0x02)}, } newRoot, err := db.Update(ops) require.NoError(t, err) - assert.True(t, core.IsInternal(&newRoot)) + assert.False(t, core.IsTerminator(&newRoot)) } func TestUpdateDeterministic(t *testing.T) { - v := core.ValueHash{0x01} - ops := []core.LeafOp{ - {Key: makeKey(0x10), Value: &v}, - {Key: makeKey(0x80), Value: &v}, + ops := []core.StemKeyValue{ + {Stem: makeStem(0x10), Hash: makeHash(0x01)}, + {Stem: makeStem(0x80), Hash: makeHash(0x02)}, } run := func() core.Node { @@ -118,22 +112,21 @@ func TestUpdateEmptyOps(t *testing.T) { assert.Equal(t, core.Terminator, root) } -func TestUpdateSortsByKey(t *testing.T) { +func TestUpdateSortsByStem(t *testing.T) { dir := t.TempDir() db, err := Open(dir, DefaultConfig()) require.NoError(t, err) defer db.Close() - v := core.ValueHash{0x01} - // Provide keys in reverse order — should still work. - ops := []core.LeafOp{ - {Key: makeKey(0x80), Value: &v}, - {Key: makeKey(0x10), Value: &v}, + // Provide stems in reverse order — should still work. + ops := []core.StemKeyValue{ + {Stem: makeStem(0x80), Hash: makeHash(0x01)}, + {Stem: makeStem(0x10), Hash: makeHash(0x02)}, } root, err := db.Update(ops) require.NoError(t, err) - assert.True(t, core.IsInternal(&root)) + assert.False(t, core.IsTerminator(&root)) } func TestSyncSeqnIncrements(t *testing.T) { @@ -144,24 +137,33 @@ func TestSyncSeqnIncrements(t *testing.T) { assert.Equal(t, uint32(0), db.SyncSeqn()) - v := core.ValueHash{0x01} - _, err = db.Update([]core.LeafOp{ - {Key: makeKey(0x10), Value: &v}, + _, err = db.Update([]core.StemKeyValue{ + {Stem: makeStem(0x10), Hash: makeHash(0x01)}, }) require.NoError(t, err) assert.Equal(t, uint32(1), db.SyncSeqn()) - _, err = db.Update([]core.LeafOp{ - {Key: makeKey(0x80), Value: &v}, + _, err = db.Update([]core.StemKeyValue{ + {Stem: makeStem(0x80), Hash: makeHash(0x02)}, }) require.NoError(t, err) assert.Equal(t, uint32(2), db.SyncSeqn()) } -func makeKey(b byte) core.KeyPath { - var kp core.KeyPath - for i := range kp { - kp[i] = b +func makeStem(b byte) core.StemPath { + var sp core.StemPath + for i := range sp { + sp[i] = b } - return kp + return sp +} + +func makeHash(b byte) core.Node { + var h core.Node + for i := range h { + h[i] = b ^ byte(i) + } + // Ensure non-zero to avoid terminator. + h[0] |= 0x01 + return h } diff --git a/nomt/merkle/pagewalker.go b/nomt/merkle/pagewalker.go index 7f10548744..0e9c9596f7 100644 --- a/nomt/merkle/pagewalker.go +++ b/nomt/merkle/pagewalker.go @@ -76,7 +76,7 @@ func NewPageWalker(root core.Node, parentPage *core.PageID) *PageWalker { } // AdvanceAndReplace advances to the given position and replaces the terminal -// node there with a sub-trie built from the provided key-value pairs. +// node there with a sub-trie built from the provided stem key-value pairs. // // The pairs must be sorted and must all share the prefix corresponding to // the position. An empty slice deletes the existing terminal node. @@ -85,7 +85,7 @@ func NewPageWalker(root core.Node, parentPage *core.PageID) *PageWalker { func (w *PageWalker) AdvanceAndReplace( pageSet PageSet, newPos core.TriePosition, - ops []core.KeyValue, + ops []core.StemKeyValue, ) { if w.lastPosition != nil { w.assertForward(&newPos) @@ -152,7 +152,7 @@ func (w *PageWalker) placeNode(node core.Node) { } } -func (w *PageWalker) replaceTerminal(pageSet PageSet, ops []core.KeyValue) { +func (w *PageWalker) replaceTerminal(pageSet PageSet, ops []core.StemKeyValue) { var existingNode core.Node if w.position.IsRoot() { existingNode = w.root @@ -163,7 +163,7 @@ func (w *PageWalker) replaceTerminal(pageSet PageSet, ops []core.KeyValue) { startDepth := w.position.Depth() - core.BuildTrie(int(w.position.Depth()), ops, func(wn core.WriteNode) { + core.BuildInternalTree(int(w.position.Depth()), ops, func(wn core.WriteNode) { node := wn.Node // For internal nodes, clear garbage in the sibling slot if the @@ -443,41 +443,24 @@ func (w *PageWalker) compactUpToRoot() { } // compactStep performs one layer of compaction: reads the current node and -// its sibling, then either compacts terminators/leaves upward or hashes -// an internal node. +// its sibling, then hashes them as an internal node. In EIP-7864, there is +// no leaf compaction — stem hashes are opaque values that never float up. func (w *PageWalker) compactStep() core.Node { node := w.node() sibling := w.siblingNode() - bit := w.position.PeekLastBit() - nodeKind := core.NodeKindOf(&node) - sibKind := core.NodeKindOf(&sibling) - - switch { - case nodeKind == core.NodeTerminator && sibKind == core.NodeTerminator: + if core.IsTerminator(&node) && core.IsTerminator(&sibling) { return core.Terminator - - case nodeKind == core.NodeLeaf && sibKind == core.NodeTerminator: - // Compact: clear this node, move leaf up. - w.setNode(core.Terminator) - return node - - case nodeKind == core.NodeTerminator && sibKind == core.NodeLeaf: - // Compact: clear sibling, move leaf up. - w.position.Sibling() - w.setNode(core.Terminator) - return sibling - - default: - // Internal: hash the two children together. - var id core.InternalData - if bit { - id = core.InternalData{Left: sibling, Right: node} - } else { - id = core.InternalData{Left: node, Right: sibling} - } - return core.HashInternal(&id) } + + bit := w.position.PeekLastBit() + var id core.InternalData + if bit { + id = core.InternalData{Left: sibling, Right: node} + } else { + id = core.InternalData{Left: node, Right: sibling} + } + return core.HashInternal(&id) } // --- page node access --- diff --git a/nomt/merkle/pagewalker_test.go b/nomt/merkle/pagewalker_test.go index 3ba0b53efd..c031414452 100644 --- a/nomt/merkle/pagewalker_test.go +++ b/nomt/merkle/pagewalker_test.go @@ -17,30 +17,47 @@ func triePos(bits ...bool) core.TriePosition { return p } -// Helper: create a KeyPath with the given bits set at the MSB positions. -func keyPath(bits ...bool) core.KeyPath { - var kp core.KeyPath - for i, b := range bits { - if b { - kp[i/8] |= 1 << (7 - i%8) +// makeStemPath creates a StemPath filled with a single byte. +func makeStemPath(b byte) core.StemPath { + var sp core.StemPath + for i := range sp { + sp[i] = b + } + return sp +} + +// makeSKV creates a StemKeyValue where the stem is filled with b +// and the hash is a deterministic non-zero value derived from b. +func makeSKV(b byte) core.StemKeyValue { + stem := makeStemPath(b) + var hash core.Node + for i := range hash { + hash[i] = b ^ byte(i) + } + return core.StemKeyValue{Stem: stem, Hash: hash} +} + +// expectedRoot computes the expected root hash for a set of stem key-values +// using BuildInternalTree as the oracle. This splits at bit 0 (left/right) +// and hashes up, matching the PageWalker's singleThreadedUpdate approach. +func expectedRoot(skvs []core.StemKeyValue) core.Node { + if len(skvs) == 0 { + return core.Terminator + } + var left, right []core.StemKeyValue + for _, skv := range skvs { + if skv.Stem[0]&0x80 == 0 { + left = append(left, skv) + } else { + right = append(right, skv) } } - return kp -} - -// Helper: create a ValueHash filled with a single byte. -func val(v byte) core.ValueHash { - var vh core.ValueHash - for i := range vh { - vh[i] = v + leftHash := core.BuildInternalTree(1, left, func(_ core.WriteNode) {}) + rightHash := core.BuildInternalTree(1, right, func(_ core.WriteNode) {}) + if core.IsTerminator(&leftHash) && core.IsTerminator(&rightHash) { + return core.Terminator } - return vh -} - -// Helper: compute the expected root from a set of key-value pairs using -// BuildTrie directly (the "oracle"). -func expectedRoot(kvs []core.KeyValue) core.Node { - return core.BuildTrie(0, kvs, func(_ core.WriteNode) {}) + return core.HashInternal(&core.InternalData{Left: leftHash, Right: rightHash}) } func TestPageWalkerEmptyTrie(t *testing.T) { @@ -56,40 +73,31 @@ func TestPageWalkerSingleInsert(t *testing.T) { ps := NewMemoryPageSet(true) walker := NewPageWalker(core.Terminator, nil) - kp := keyPath(false, false) - v := val(1) - pos := triePos(false, false) + skv := makeSKV(0x00) // stem 0x00..., first bit = 0 + pos := triePos(false) - walker.AdvanceAndReplace(ps, pos, []core.KeyValue{{Key: kp, Value: v}}) + walker.AdvanceAndReplace(ps, pos, []core.StemKeyValue{skv}) out := walker.Conclude() - expected := expectedRoot([]core.KeyValue{{Key: kp, Value: v}}) + expected := expectedRoot([]core.StemKeyValue{skv}) assert.Equal(t, expected, out.Root) - assert.True(t, core.IsLeaf(&out.Root)) + assert.False(t, core.IsTerminator(&out.Root)) } func TestPageWalkerTwoInsertsSameAdvance(t *testing.T) { - // Two keys that share a common prefix at position [0,0], then diverge. + // Two stems that share a common prefix, then diverge. ps := NewMemoryPageSet(true) walker := NewPageWalker(core.Terminator, nil) - kp1 := keyPath(false, false, true, false) // 0010... - kp2 := keyPath(false, false, true, true) // 0011... - v1, v2 := val(1), val(2) + skv1 := makeSKV(0x00) // 0000 0000... + skv2 := makeSKV(0x01) // 0000 0001... (differ at bit 7) - pos := triePos(false, false) - walker.AdvanceAndReplace(ps, pos, []core.KeyValue{ - {Key: kp1, Value: v1}, - {Key: kp2, Value: v2}, - }) + pos := triePos(false) // advance to left subtree + walker.AdvanceAndReplace(ps, pos, []core.StemKeyValue{skv1, skv2}) out := walker.Conclude() - expected := expectedRoot([]core.KeyValue{ - {Key: kp1, Value: v1}, - {Key: kp2, Value: v2}, - }) + expected := expectedRoot([]core.StemKeyValue{skv1, skv2}) assert.Equal(t, expected, out.Root) - assert.True(t, core.IsInternal(&out.Root)) } func TestPageWalkerTwoAdvances(t *testing.T) { @@ -97,71 +105,55 @@ func TestPageWalkerTwoAdvances(t *testing.T) { ps := NewMemoryPageSet(true) walker := NewPageWalker(core.Terminator, nil) - kp0 := keyPath(false, false) // 00... - kp1 := keyPath(true, false) // 10... - v0, v1 := val(1), val(2) + skv0 := makeSKV(0x00) // first bit 0 + skv1 := makeSKV(0x80) // first bit 1 - walker.AdvanceAndReplace(ps, triePos(false), []core.KeyValue{ - {Key: kp0, Value: v0}, - }) - walker.AdvanceAndReplace(ps, triePos(true), []core.KeyValue{ - {Key: kp1, Value: v1}, - }) + walker.AdvanceAndReplace(ps, triePos(false), []core.StemKeyValue{skv0}) + walker.AdvanceAndReplace(ps, triePos(true), []core.StemKeyValue{skv1}) out := walker.Conclude() - expected := expectedRoot([]core.KeyValue{ - {Key: kp0, Value: v0}, - {Key: kp1, Value: v1}, - }) + expected := expectedRoot([]core.StemKeyValue{skv0, skv1}) assert.Equal(t, expected, out.Root) } func TestPageWalkerMultipleAdvances(t *testing.T) { - // Match the Rust multi_value test pattern: - // 0b00010000 = 0x10, 0b00100000 = 0x20, 0b01000000 = 0x40, - // 0b10100000 = 0xA0, 0b10110000 = 0xB0 + // Match the multi_value test pattern. ps := NewMemoryPageSet(true) walker := NewPageWalker(core.Terminator, nil) - kvA := core.KeyValue{Key: makeKVKey(0x10), Value: makeKVVal(0x10)} - kvB := core.KeyValue{Key: makeKVKey(0x20), Value: makeKVVal(0x20)} - kvC := core.KeyValue{Key: makeKVKey(0x40), Value: makeKVVal(0x40)} - kvD := core.KeyValue{Key: makeKVKey(0xA0), Value: makeKVVal(0xA0)} - kvE := core.KeyValue{Key: makeKVKey(0xB0), Value: makeKVVal(0xB0)} + skvA := makeSKV(0x10) // 0001... + skvB := makeSKV(0x20) // 0010... + skvC := makeSKV(0x40) // 0100... + skvD := makeSKV(0xA0) // 1010... + skvE := makeSKV(0xB0) // 1011... - allOps := []core.KeyValue{kvA, kvB, kvC, kvD, kvE} + allOps := []core.StemKeyValue{skvA, skvB, skvC, skvD, skvE} expected := expectedRoot(allOps) // Group by terminal: A and B share prefix 00, C is at 01, D and E // share prefix 101. - // Terminal at [0,0]: A, B walker.AdvanceAndReplace(ps, triePos(false, false), - []core.KeyValue{kvA, kvB}) - // Terminal at [0,1]: C + []core.StemKeyValue{skvA, skvB}) walker.AdvanceAndReplace(ps, triePos(false, true), - []core.KeyValue{kvC}) - // Terminal at [1]: D, E + []core.StemKeyValue{skvC}) walker.AdvanceAndReplace(ps, triePos(true), - []core.KeyValue{kvD, kvE}) + []core.StemKeyValue{skvD, skvE}) out := walker.Conclude() assert.Equal(t, expected, out.Root) } func TestPageWalkerDeleteToTerminator(t *testing.T) { - // Insert a leaf, then delete it in a second walker pass. + // Insert a stem, then delete it in a second walker pass. ps := NewMemoryPageSet(true) - kp := keyPath(false) - v := val(1) + skv := makeSKV(0x00) // left child - // First: insert a leaf. + // First: insert a stem. walker1 := NewPageWalker(core.Terminator, nil) - walker1.AdvanceAndReplace(ps, triePos(false), []core.KeyValue{ - {Key: kp, Value: v}, - }) + walker1.AdvanceAndReplace(ps, triePos(false), []core.StemKeyValue{skv}) out1 := walker1.Conclude() - require.True(t, core.IsLeaf(&out1.Root)) + require.False(t, core.IsTerminator(&out1.Root)) ps.Apply(out1.Pages) // Second: delete it (empty ops = terminator replacement). @@ -171,45 +163,37 @@ func TestPageWalkerDeleteToTerminator(t *testing.T) { assert.Equal(t, core.Terminator, out2.Root) } -func TestPageWalkerCompactionLeafUp(t *testing.T) { - // When one sibling becomes a terminator and the other is a leaf, - // the leaf should be compacted upward. +func TestPageWalkerNoLeafCompaction(t *testing.T) { + // In EIP-7864, when one sibling becomes a terminator and the other is + // a stem hash, they form an internal node (no compaction upward). ps := NewMemoryPageSet(true) - kp0 := keyPath(false) - kp1 := keyPath(true) - v := val(1) + skv0 := makeSKV(0x00) // left + skv1 := makeSKV(0x80) // right - // Insert two leaves. + // Insert two stems. walker1 := NewPageWalker(core.Terminator, nil) - walker1.AdvanceAndReplace(ps, triePos(false), []core.KeyValue{ - {Key: kp0, Value: v}, - }) - walker1.AdvanceAndReplace(ps, triePos(true), []core.KeyValue{ - {Key: kp1, Value: v}, - }) + walker1.AdvanceAndReplace(ps, triePos(false), []core.StemKeyValue{skv0}) + walker1.AdvanceAndReplace(ps, triePos(true), []core.StemKeyValue{skv1}) out1 := walker1.Conclude() ps.Apply(out1.Pages) - // Delete the right leaf — left leaf should compact up to root. + // Delete the right stem — root should be HashInternal(stemHash, Terminator), + // NOT just stemHash (no compaction). walker2 := NewPageWalker(out1.Root, nil) walker2.AdvanceAndReplace(ps, triePos(true), nil) out2 := walker2.Conclude() - expectedLeaf := core.HashLeaf(&core.LeafData{KeyPath: kp0, ValueHash: v}) - assert.Equal(t, expectedLeaf, out2.Root) + expected := core.HashInternal(&core.InternalData{Left: skv0.Hash, Right: core.Terminator}) + assert.Equal(t, expected, out2.Root) } func TestPageWalkerOutputPages(t *testing.T) { ps := NewMemoryPageSet(true) walker := NewPageWalker(core.Terminator, nil) - kp := keyPath(false) - v := val(1) - - walker.AdvanceAndReplace(ps, triePos(false), []core.KeyValue{ - {Key: kp, Value: v}, - }) + skv := makeSKV(0x00) + walker.AdvanceAndReplace(ps, triePos(false), []core.StemKeyValue{skv}) out := walker.Conclude() // Should output at least the root page. @@ -221,31 +205,25 @@ func TestPageWalkerAdvanceBackwardsPanics(t *testing.T) { ps := NewMemoryPageSet(true) walker := NewPageWalker(core.Terminator, nil) - walker.AdvanceAndReplace(ps, triePos(true), []core.KeyValue{ - {Key: keyPath(true), Value: val(1)}, - }) + walker.AdvanceAndReplace(ps, triePos(true), []core.StemKeyValue{makeSKV(0x80)}) assert.Panics(t, func() { - walker.AdvanceAndReplace(ps, triePos(false), []core.KeyValue{ - {Key: keyPath(false), Value: val(2)}, - }) + walker.AdvanceAndReplace(ps, triePos(false), []core.StemKeyValue{makeSKV(0x00)}) }) } func TestPageWalkerDeterministic(t *testing.T) { - // Same inputs should produce the same root. - kvs := []core.KeyValue{ - {Key: makeKVKey(0x10), Value: makeKVVal(0x10)}, - {Key: makeKVKey(0x50), Value: makeKVVal(0x50)}, - {Key: makeKVKey(0xA0), Value: makeKVVal(0xA0)}, + skvs := []core.StemKeyValue{ + makeSKV(0x10), + makeSKV(0x50), + makeSKV(0xA0), } run := func() core.Node { ps := NewMemoryPageSet(true) w := NewPageWalker(core.Terminator, nil) - // All at root terminal since trie is empty. - w.AdvanceAndReplace(ps, triePos(false), kvs[:2]) - w.AdvanceAndReplace(ps, triePos(true), kvs[2:]) + w.AdvanceAndReplace(ps, triePos(false), skvs[:2]) + w.AdvanceAndReplace(ps, triePos(true), skvs[2:]) return w.Conclude().Root } @@ -259,102 +237,60 @@ func TestPageWalkerIncrementalUpdates(t *testing.T) { // building from scratch. ps := NewMemoryPageSet(true) - kp0 := makeKVKey(0x10) - kp1 := makeKVKey(0x80) - v1, v2 := makeKVVal(0x01), makeKVVal(0x02) + skv0 := makeSKV(0x10) // left + skv1 := makeSKV(0x80) // right + skv1Updated := core.StemKeyValue{ + Stem: skv1.Stem, + Hash: core.Node{0xFF, 0xFE, 0xFD}, // different hash + } - // Pass 1: insert two keys. + // Pass 1: insert two stems. w1 := NewPageWalker(core.Terminator, nil) - w1.AdvanceAndReplace(ps, triePos(false), []core.KeyValue{ - {Key: kp0, Value: v1}, - }) - w1.AdvanceAndReplace(ps, triePos(true), []core.KeyValue{ - {Key: kp1, Value: v1}, - }) + w1.AdvanceAndReplace(ps, triePos(false), []core.StemKeyValue{skv0}) + w1.AdvanceAndReplace(ps, triePos(true), []core.StemKeyValue{skv1}) out1 := w1.Conclude() ps.Apply(out1.Pages) - // Pass 2: update the second key's value. + // Pass 2: update the second stem's hash. w2 := NewPageWalker(out1.Root, nil) - w2.AdvanceAndReplace(ps, triePos(true), []core.KeyValue{ - {Key: kp1, Value: v2}, - }) + w2.AdvanceAndReplace(ps, triePos(true), []core.StemKeyValue{skv1Updated}) out2 := w2.Conclude() - // The expected root should match building the whole trie from scratch - // with the updated value. - expected := expectedRoot([]core.KeyValue{ - {Key: kp0, Value: v1}, - {Key: kp1, Value: v2}, - }) + // The expected root should match building from scratch with updated hash. + expected := expectedRoot([]core.StemKeyValue{skv0, skv1Updated}) assert.Equal(t, expected, out2.Root) } func TestPageWalkerAdvanceWithoutModify(t *testing.T) { - // Test the Advance (read-only) method. ps := NewMemoryPageSet(true) walker := NewPageWalker(core.Terminator, nil) - kp0 := keyPath(false, false, true, false) // 0010... - kp1 := keyPath(false, false, true, true) // 0011... - kp2 := keyPath(true) // 1... - v := val(1) + skvA := makeSKV(0x00) // 0000... + skvB := makeSKV(0x01) // 0000 0001... (share prefix with A) + skvC := makeSKV(0x80) // 1000... - walker.AdvanceAndReplace(ps, triePos(false, false), []core.KeyValue{ - {Key: kp0, Value: v}, - {Key: kp1, Value: v}, - }) + walker.AdvanceAndReplace(ps, triePos(false), []core.StemKeyValue{skvA, skvB}) - // Advance past [0,1] without modifying — the walker should still - // compact correctly. + // Advance past [0,1] without modifying. walker.Advance(triePos(false, true)) - walker.AdvanceAndReplace(ps, triePos(true), []core.KeyValue{ - {Key: kp2, Value: v}, - }) + walker.AdvanceAndReplace(ps, triePos(true), []core.StemKeyValue{skvC}) out := walker.Conclude() - expected := expectedRoot([]core.KeyValue{ - {Key: kp0, Value: v}, - {Key: kp1, Value: v}, - {Key: kp2, Value: v}, - }) + expected := expectedRoot([]core.StemKeyValue{skvA, skvB, skvC}) assert.Equal(t, expected, out.Root) } func TestPageWalkerPageDiffs(t *testing.T) { - // Verify that output pages have non-empty diffs. ps := NewMemoryPageSet(true) walker := NewPageWalker(core.Terminator, nil) - kp := keyPath(false, false) - v := val(1) - walker.AdvanceAndReplace(ps, triePos(false, false), []core.KeyValue{ - {Key: kp, Value: v}, - }) + skv := makeSKV(0x00) + walker.AdvanceAndReplace(ps, triePos(false), []core.StemKeyValue{skv}) out := walker.Conclude() require.NotEmpty(t, out.Pages) - // The root page should have at least one changed node. assert.True(t, out.Pages[0].Diff.Count() > 0, "page diff should track changed nodes") } - -// --- helpers --- - -func makeKVKey(b byte) core.KeyPath { - var kp core.KeyPath - for i := range kp { - kp[i] = b - } - return kp -} - -func makeKVVal(b byte) core.ValueHash { - var vh core.ValueHash - for i := range vh { - vh[i] = b - } - return vh -} diff --git a/nomt/merkle/worker.go b/nomt/merkle/worker.go index 33a30841d0..93f914cf25 100644 --- a/nomt/merkle/worker.go +++ b/nomt/merkle/worker.go @@ -8,10 +8,10 @@ import ( "github.com/ethereum/go-ethereum/nomt/core" ) -// childBucket groups key-value operations for a single root page child index. +// childBucket groups stem key-value operations for a single root page child index. type childBucket struct { childIndex uint8 - kvs []core.KeyValue + kvs []core.StemKeyValue } // workerTask describes the work assigned to a single worker goroutine. @@ -26,15 +26,15 @@ type workerResult struct { err error } -// ParallelUpdate applies sorted key-value operations to the trie using +// ParallelUpdate applies sorted stem key-value operations to the trie using // multiple worker goroutines. Each worker processes a disjoint set of root -// page child subtrees (partitioned by the first 6 bits of each key path). +// page child subtrees (partitioned by the first 6 bits of each stem path). // // If numWorkers <= 1 or the batch is small, falls back to single-threaded. // The pageSetFactory is called once per worker to create independent PageSets. func ParallelUpdate( root core.Node, - kvs []core.KeyValue, + kvs []core.StemKeyValue, numWorkers int, pageSetFactory func() PageSet, ) Output { @@ -115,42 +115,49 @@ func ParallelUpdate( // singleThreadedUpdate runs the trie update with a single PageWalker. // This is the fallback for small batches or single-worker configurations. +// +// Uses the same child-index partitioning as the parallel path to ensure +// identical hash results. Without leaf compaction, splitting at depth 1 +// vs depth 7 produces different intermediate hashes, so both paths must +// use the same splitting strategy. func singleThreadedUpdate( root core.Node, - kvs []core.KeyValue, + kvs []core.StemKeyValue, pageSet PageSet, ) Output { walker := NewPageWalker(root, nil) - var leftKVs, rightKVs []core.KeyValue - for i := range kvs { - if kvs[i].Key[0]&0x80 == 0 { - leftKVs = append(leftKVs, kvs[i]) - } else { - rightKVs = append(rightKVs, kvs[i]) + buckets := partitionByChildIndex(kvs) + for childIdx, childKVs := range buckets { + if len(childKVs) == 0 { + continue + } + var leftKVs, rightKVs []core.StemKeyValue + for i := range childKVs { + if (childKVs[i].Stem[0]>>1)&1 == 0 { + leftKVs = append(leftKVs, childKVs[i]) + } else { + rightKVs = append(rightKVs, childKVs[i]) + } } - } - if len(leftKVs) > 0 { - leftPos := core.NewTriePosition() - leftPos.Down(false) - walker.AdvanceAndReplace(pageSet, leftPos, leftKVs) - } - if len(rightKVs) > 0 { - rightPos := core.NewTriePosition() - rightPos.Down(true) - walker.AdvanceAndReplace(pageSet, rightPos, rightKVs) + if len(leftKVs) > 0 { + walker.AdvanceAndReplace(pageSet, childPosition(uint8(childIdx), false), leftKVs) + } + if len(rightKVs) > 0 { + walker.AdvanceAndReplace(pageSet, childPosition(uint8(childIdx), true), rightKVs) + } } return walker.Conclude() } -// partitionByChildIndex buckets sorted KVs by the first 6 bits of each key +// partitionByChildIndex buckets sorted SKVs by the first 6 bits of each stem // path (the root page's child index: 0-63). -func partitionByChildIndex(kvs []core.KeyValue) [64][]core.KeyValue { - var buckets [64][]core.KeyValue +func partitionByChildIndex(kvs []core.StemKeyValue) [64][]core.StemKeyValue { + var buckets [64][]core.StemKeyValue for i := range kvs { - childIdx := kvs[i].Key[0] >> 2 + childIdx := kvs[i].Stem[0] >> 2 buckets[childIdx] = append(buckets[childIdx], kvs[i]) } return buckets @@ -159,7 +166,7 @@ func partitionByChildIndex(kvs []core.KeyValue) [64][]core.KeyValue { // assignToWorkers distributes non-empty child buckets across numWorkers // contiguous ranges. func assignToWorkers( - buckets [64][]core.KeyValue, + buckets [64][]core.StemKeyValue, numWorkers int, ) []workerTask { var nonEmpty []childBucket @@ -205,9 +212,9 @@ func runWorker( walker := NewPageWalker(root, &rootPageID) for _, child := range task.children { - var leftKVs, rightKVs []core.KeyValue + var leftKVs, rightKVs []core.StemKeyValue for i := range child.kvs { - if (child.kvs[i].Key[0]>>1)&1 == 0 { + if (child.kvs[i].Stem[0]>>1)&1 == 0 { leftKVs = append(leftKVs, child.kvs[i]) } else { rightKVs = append(rightKVs, child.kvs[i]) diff --git a/nomt/merkle/worker_test.go b/nomt/merkle/worker_test.go index 20e7adfb27..f867489e4c 100644 --- a/nomt/merkle/worker_test.go +++ b/nomt/merkle/worker_test.go @@ -13,13 +13,13 @@ import ( // --- Unit tests for helpers --- func TestPartitionByChildIndex(t *testing.T) { - // Key 0x00... → child 0, key 0xFC... → child 63. - kvs := []core.KeyValue{ - {Key: makeKVKey(0x00), Value: makeKVVal(1)}, - {Key: makeKVKey(0x04), Value: makeKVVal(2)}, // 0x04 >> 2 = 1 - {Key: makeKVKey(0xFC), Value: makeKVVal(3)}, // 0xFC >> 2 = 63 + // Stem 0x00... → child 0, stem 0x04... → child 1, stem 0xFC... → child 63. + skvs := []core.StemKeyValue{ + makeSKV(0x00), + makeSKV(0x04), // 0x04 >> 2 = 1 + makeSKV(0xFC), // 0xFC >> 2 = 63 } - buckets := partitionByChildIndex(kvs) + buckets := partitionByChildIndex(skvs) assert.Len(t, buckets[0], 1) assert.Len(t, buckets[1], 1) @@ -69,10 +69,10 @@ func TestChildPosition(t *testing.T) { func TestAssignToWorkers(t *testing.T) { // 3 non-empty buckets, 2 workers. - var buckets [64][]core.KeyValue - buckets[0] = []core.KeyValue{{Key: makeKVKey(0x00), Value: makeKVVal(1)}} - buckets[10] = []core.KeyValue{{Key: makeKVKey(0x28), Value: makeKVVal(2)}} // 0x28>>2=10 - buckets[63] = []core.KeyValue{{Key: makeKVKey(0xFC), Value: makeKVVal(3)}} + var buckets [64][]core.StemKeyValue + buckets[0] = []core.StemKeyValue{makeSKV(0x00)} + buckets[10] = []core.StemKeyValue{makeSKV(0x28)} // 0x28>>2=10 + buckets[63] = []core.StemKeyValue{makeSKV(0xFC)} tasks := assignToWorkers(buckets, 2) require.Len(t, tasks, 2) @@ -85,9 +85,9 @@ func TestAssignToWorkers(t *testing.T) { } func TestAssignToWorkersMoreWorkersThanChildren(t *testing.T) { - var buckets [64][]core.KeyValue - buckets[5] = []core.KeyValue{{Key: makeKVKey(0x14), Value: makeKVVal(1)}} // 0x14>>2=5 - buckets[6] = []core.KeyValue{{Key: makeKVKey(0x18), Value: makeKVVal(2)}} // 0x18>>2=6 + var buckets [64][]core.StemKeyValue + buckets[5] = []core.StemKeyValue{makeSKV(0x14)} // 0x14>>2=5 + buckets[6] = []core.StemKeyValue{makeSKV(0x18)} // 0x18>>2=6 tasks := assignToWorkers(buckets, 8) // Only 2 non-empty, so cap to 2 workers. @@ -118,99 +118,152 @@ func memoryPageSetFactory() PageSet { return &permissivePageSet{NewMemoryPageSet(true)} } +// expectedWorkerRoot computes the expected root hash matching the depth-7 +// child-index partitioning used by both singleThreadedUpdate and ParallelUpdate. +// This differs from expectedRoot (which splits at depth 1) because without +// leaf compaction, the splitting depth affects intermediate hashes. +func expectedWorkerRoot(skvs []core.StemKeyValue) core.Node { + if len(skvs) == 0 { + return core.Terminator + } + + // Partition into 128 subtree roots (64 child indices × 2 sides). + buckets := partitionByChildIndex(skvs) + var roots [128]core.Node + for ci := range 64 { + if len(buckets[ci]) == 0 { + continue + } + var leftKVs, rightKVs []core.StemKeyValue + for i := range buckets[ci] { + if (buckets[ci][i].Stem[0]>>1)&1 == 0 { + leftKVs = append(leftKVs, buckets[ci][i]) + } else { + rightKVs = append(rightKVs, buckets[ci][i]) + } + } + if len(leftKVs) > 0 { + roots[ci*2] = core.BuildInternalTree(7, leftKVs, func(_ core.WriteNode) {}) + } + if len(rightKVs) > 0 { + roots[ci*2+1] = core.BuildInternalTree(7, rightKVs, func(_ core.WriteNode) {}) + } + } + + // Hash up 7 levels: 128 → 64 → 32 → 16 → 8 → 4 → 2 → 1. + nodes := make([]core.Node, 128) + copy(nodes, roots[:]) + for len(nodes) > 1 { + half := len(nodes) / 2 + next := make([]core.Node, half) + for i := range half { + left := nodes[i*2] + right := nodes[i*2+1] + if core.IsTerminator(&left) && core.IsTerminator(&right) { + next[i] = core.Terminator + } else { + next[i] = core.HashInternal(&core.InternalData{Left: left, Right: right}) + } + } + nodes = next + } + + return nodes[0] +} + func TestParallelUpdateEmpty(t *testing.T) { out := ParallelUpdate(core.Terminator, nil, 4, memoryPageSetFactory) assert.Equal(t, core.Terminator, out.Root) } func TestParallelUpdateSingleKey(t *testing.T) { - kv := core.KeyValue{Key: makeKVKey(0x50), Value: makeKVVal(1)} - kvs := []core.KeyValue{kv} + skv := makeSKV(0x50) + skvs := []core.StemKeyValue{skv} - out := ParallelUpdate(core.Terminator, kvs, 4, memoryPageSetFactory) - expected := expectedRoot(kvs) + out := ParallelUpdate(core.Terminator, skvs, 4, memoryPageSetFactory) + expected := expectedWorkerRoot(skvs) assert.Equal(t, expected, out.Root) } func TestParallelUpdateTwoKeysDifferentChildren(t *testing.T) { // 0x00 → child 0, 0x80 → child 32. - kvs := []core.KeyValue{ - {Key: makeKVKey(0x00), Value: makeKVVal(1)}, - {Key: makeKVKey(0x80), Value: makeKVVal(2)}, + skvs := []core.StemKeyValue{ + makeSKV(0x00), + makeSKV(0x80), } - out := ParallelUpdate(core.Terminator, kvs, 4, memoryPageSetFactory) - expected := expectedRoot(kvs) + out := ParallelUpdate(core.Terminator, skvs, 4, memoryPageSetFactory) + expected := expectedWorkerRoot(skvs) assert.Equal(t, expected, out.Root) } func TestParallelUpdateSparseChildren(t *testing.T) { // Only children 0 and 63 have ops. - kvs := []core.KeyValue{ - {Key: makeKVKey(0x00), Value: makeKVVal(1)}, - {Key: makeKVKey(0xFC), Value: makeKVVal(2)}, + skvs := []core.StemKeyValue{ + makeSKV(0x00), + makeSKV(0xFC), } - out := ParallelUpdate(core.Terminator, kvs, 4, memoryPageSetFactory) - expected := expectedRoot(kvs) + out := ParallelUpdate(core.Terminator, skvs, 4, memoryPageSetFactory) + expected := expectedWorkerRoot(skvs) assert.Equal(t, expected, out.Root) } func TestParallelUpdateSingleChild(t *testing.T) { - // All keys land in child 0 (first 6 bits = 000000). - kvs := []core.KeyValue{ - {Key: makeKVKey(0x00), Value: makeKVVal(1)}, - {Key: makeKVKey(0x01), Value: makeKVVal(2)}, - {Key: makeKVKey(0x02), Value: makeKVVal(3)}, - {Key: makeKVKey(0x03), Value: makeKVVal(4)}, + // All stems land in child 0 (first 6 bits = 000000). + skvs := []core.StemKeyValue{ + makeSKV(0x00), + makeSKV(0x01), + makeSKV(0x02), + makeSKV(0x03), } - sort.Slice(kvs, func(i, j int) bool { return kvLess(&kvs[i], &kvs[j]) }) + sort.Slice(skvs, func(i, j int) bool { return skvLess(&skvs[i], &skvs[j]) }) - out := ParallelUpdate(core.Terminator, kvs, 4, memoryPageSetFactory) - expected := expectedRoot(kvs) + out := ParallelUpdate(core.Terminator, skvs, 4, memoryPageSetFactory) + expected := expectedWorkerRoot(skvs) assert.Equal(t, expected, out.Root) } func TestParallelUpdateFallbackSmallBatch(t *testing.T) { // Less than 64 ops → single-threaded fallback. - kvs := randomKVs(10, 42) - out := ParallelUpdate(core.Terminator, kvs, 8, memoryPageSetFactory) - expected := expectedRoot(kvs) + skvs := randomSKVs(10, 42) + out := ParallelUpdate(core.Terminator, skvs, 8, memoryPageSetFactory) + expected := expectedWorkerRoot(skvs) assert.Equal(t, expected, out.Root) } func TestParallelUpdateDeterministic(t *testing.T) { - kvs := randomKVs(200, 99) + skvs := randomSKVs(200, 99) - r1 := ParallelUpdate(core.Terminator, kvs, 4, memoryPageSetFactory).Root - r2 := ParallelUpdate(core.Terminator, kvs, 4, memoryPageSetFactory).Root + r1 := ParallelUpdate(core.Terminator, skvs, 4, memoryPageSetFactory).Root + r2 := ParallelUpdate(core.Terminator, skvs, 4, memoryPageSetFactory).Root assert.Equal(t, r1, r2, "same inputs should produce same root") } func TestParallelUpdateMatchesSingleThreaded(t *testing.T) { tests := []struct { name string - numKVs int + numSKVs int workers int }{ - {"1kv_2w", 1, 2}, - {"10kv_2w", 10, 2}, - {"100kv_2w", 100, 2}, - {"100kv_4w", 100, 4}, - {"100kv_8w", 100, 8}, - {"500kv_4w", 500, 4}, - {"1000kv_8w", 1000, 8}, + {"1skv_2w", 1, 2}, + {"10skv_2w", 10, 2}, + {"100skv_2w", 100, 2}, + {"100skv_4w", 100, 4}, + {"100skv_8w", 100, 8}, + {"500skv_4w", 500, 4}, + {"1000skv_8w", 1000, 8}, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - kvs := randomKVs(tc.numKVs, 12345) + skvs := randomSKVs(tc.numSKVs, 12345) single := singleThreadedUpdate( - core.Terminator, kvs, NewMemoryPageSet(true), + core.Terminator, skvs, memoryPageSetFactory(), ) parallel := ParallelUpdate( - core.Terminator, kvs, tc.workers, memoryPageSetFactory, + core.Terminator, skvs, tc.workers, memoryPageSetFactory, ) assert.Equal(t, single.Root, parallel.Root, @@ -221,36 +274,38 @@ func TestParallelUpdateMatchesSingleThreaded(t *testing.T) { // --- helpers --- -func randomKVs(n int, seed int64) []core.KeyValue { +func randomSKVs(n int, seed int64) []core.StemKeyValue { rng := rand.New(rand.NewSource(seed)) - kvs := make([]core.KeyValue, n) - seen := make(map[core.KeyPath]bool, n) + skvs := make([]core.StemKeyValue, n) + seen := make(map[core.StemPath]bool, n) for i := range n { for { - var kp core.KeyPath - rng.Read(kp[:]) - if seen[kp] { + var stem core.StemPath + rng.Read(stem[:]) + if seen[stem] { continue } - seen[kp] = true - var vh core.ValueHash - rng.Read(vh[:]) - kvs[i] = core.KeyValue{Key: kp, Value: vh} + seen[stem] = true + var hash core.Node + rng.Read(hash[:]) + // Ensure non-zero hash (avoid terminator). + hash[0] |= 0x01 + skvs[i] = core.StemKeyValue{Stem: stem, Hash: hash} break } } - sort.Slice(kvs, func(i, j int) bool { return kvLess(&kvs[i], &kvs[j]) }) - return kvs + sort.Slice(skvs, func(i, j int) bool { return skvLess(&skvs[i], &skvs[j]) }) + return skvs } -func kvLess(a, b *core.KeyValue) bool { - for i := range a.Key { - if a.Key[i] < b.Key[i] { +func skvLess(a, b *core.StemKeyValue) bool { + for i := range a.Stem { + if a.Stem[i] < b.Stem[i] { return true } - if a.Key[i] > b.Key[i] { + if a.Stem[i] > b.Stem[i] { return false } }