nomt: optimize Hash() pipeline — pool hashers, eliminate redundant sorts, in-place merge

Performance optimizations to the NOMT storage engine while preserving
correctness (all triecompare cross-validation tests pass at 10K+ scale):

- Pool SHA256 hashers via sync.Pool in HashInternal and HashStem
- Replace allStems map with sorted slice + O(N+M) merge (in-place fast
  path for incremental updates avoids allocation entirely)
- Add UpdateSorted to db.DB, skipping redundant sort of pre-sorted ops
- Simplify canonicalRoot to use pre-sorted allStems directly
- Optimize StemSharedBits with byte-level XOR + bits.LeadingZeros8
- Replace stemLess loops with bytes.Compare in all locations
- Eliminate per-stem map alloc in groupAndHashStems (use [256]bool dirty)
- Use stack-allocated [248]bool for downBits in BuildInternalTree
- Remove unused stemPathCmp function

BenchmarkHash/10000/nomt: 9.8ms → 8.2ms (-16%)
BenchmarkBlockWorkload/nomt: 7.7ms → 6.6ms (-14%)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
weiihann 2026-02-13 01:11:59 +08:00
parent d61dd875d8
commit 036e37809e
6 changed files with 164 additions and 98 deletions

View file

@ -1,6 +1,10 @@
package core
import "crypto/sha256"
import (
"crypto/sha256"
"hash"
"sync"
)
const (
// StemSize is the number of bytes in a stem path (248 bits).
@ -13,13 +17,19 @@ const (
HashSize = 32
)
var sha256Pool = sync.Pool{
New: func() any { return sha256.New() },
}
// HashInternal computes SHA256(left || right) matching EIP-7864's InternalNode.Hash().
func HashInternal(data *InternalData) Node {
h := sha256.New()
h := sha256Pool.Get().(hash.Hash)
h.Reset()
h.Write(data.Left[:])
h.Write(data.Right[:])
var out Node
h.Sum(out[:0])
sha256Pool.Put(h)
return out
}
@ -38,7 +48,7 @@ func HashStem(stem [StemSize]byte, values [StemNodeWidth][]byte) Node {
}
}
h := sha256.New()
h := sha256Pool.Get().(hash.Hash)
for level := 1; level <= 8; level++ {
for i := range StemNodeWidth / (1 << level) {
if data[i*2] == (Node{}) && data[i*2+1] == (Node{}) {
@ -58,5 +68,6 @@ func HashStem(stem [StemSize]byte, values [StemNodeWidth][]byte) Node {
h.Write(data[0][:])
var out Node
h.Sum(out[:0])
sha256Pool.Put(h)
return out
}

View file

@ -1,5 +1,7 @@
package core
import "math/bits"
// StemKeyValue is a resolved (stemPath, stemHash) pair for the page tree.
// The stem hash is precomputed by the integration layer using HashStem.
type StemKeyValue struct {
@ -32,17 +34,26 @@ type WriteNode struct {
// StemSharedBits counts the number of shared prefix bits between two stem
// paths, starting after `skip` bits.
func StemSharedBits(a, b *StemPath, skip int) int {
count := 0
maxBits := StemSize * 8 // 248
for i := skip; i < maxBits; i++ {
aBit := (a[i/8] >> (7 - i%8)) & 1
bBit := (b[i/8] >> (7 - i%8)) & 1
if aBit != bBit {
break
startByte := skip / 8
// Handle partial first byte if skip is not byte-aligned.
if skip%8 != 0 {
mask := byte(0xFF >> (skip % 8))
xor := (a[startByte] ^ b[startByte]) & mask
if xor != 0 {
return bits.LeadingZeros8(xor) - (skip % 8)
}
count++
startByte++
}
return count
// Compare full bytes.
for i := startByte; i < StemSize; i++ {
xor := a[i] ^ b[i]
if xor != 0 {
return i*8 + bits.LeadingZeros8(xor) - skip
}
}
return StemSize*8 - skip
}
// BuildInternalTree builds a compact internal-node sub-trie from sorted
@ -132,9 +143,10 @@ func BuildInternalTree(skip int, ops []StemKeyValue, visit func(WriteNode)) Node
}
stemEndBit := skip + stemDepth
var downBuf [StemSize * 8]bool
var downBits []bool
if stemEndBit > downStart {
downBits = make([]bool, stemEndBit-downStart)
downBits = downBuf[:stemEndBit-downStart]
for i := downStart; i < stemEndBit; i++ {
downBits[i-downStart] = stemBitAt(thisStem, i)
}
@ -191,15 +203,3 @@ func BuildInternalTree(skip int, ops []StemKeyValue, visit func(WriteNode)) Node
func stemBitAt(stem *StemPath, idx int) bool {
return (stem[idx/8]>>(7-idx%8))&1 == 1
}
func stemPathCmp(a, b *StemPath) int {
for i := range a {
if a[i] < b[i] {
return -1
}
if a[i] > b[i] {
return 1
}
}
return 0
}

View file

@ -6,6 +6,7 @@
package db
import (
"bytes"
"crypto/rand"
"fmt"
"os"
@ -127,14 +128,18 @@ func (db *DB) SyncSeqn() uint32 {
return db.syncSeqn
}
// Update applies a sorted batch of stem key-value pairs to the trie.
//
// The pairs must be pre-sorted by stem path. The function:
// 1. Builds a PageSet from Bitbox
// 2. Runs the parallel PageWalker to produce updated pages
// 3. Persists updated pages via Bitbox sync
// 4. Returns the new root hash
// Update applies a batch of stem key-value pairs to the trie.
// The pairs are sorted internally before processing.
func (db *DB) Update(ops []core.StemKeyValue) (core.Node, error) {
sort.Slice(ops, func(i, j int) bool {
return stemLess(&ops[i].Stem, &ops[j].Stem)
})
return db.UpdateSorted(ops)
}
// UpdateSorted applies a pre-sorted batch of stem key-value pairs to the trie.
// The caller must ensure ops are sorted by stem path.
func (db *DB) UpdateSorted(ops []core.StemKeyValue) (core.Node, error) {
if len(ops) == 0 {
return db.Root(), nil
}
@ -142,11 +147,6 @@ func (db *DB) Update(ops []core.StemKeyValue) (core.Node, error) {
db.mu.Lock()
defer db.mu.Unlock()
// Sort by stem path.
sort.Slice(ops, func(i, j int) bool {
return stemLess(&ops[i].Stem, &ops[j].Stem)
})
pageSetFactory := func() merkle.PageSet {
return newBitboxPageSet(db.bb)
}
@ -249,13 +249,5 @@ func pageIDKey(id core.PageID) string {
}
func stemLess(a, b *core.StemPath) bool {
for i := range a {
if a[i] < b[i] {
return true
}
if a[i] > b[i] {
return false
}
}
return false
return bytes.Compare(a[:], b[:]) < 0
}

View file

@ -1,6 +1,7 @@
package nomttrie
import (
"bytes"
"sort"
"github.com/ethereum/go-ethereum/ethdb"
@ -54,16 +55,19 @@ func loadStemValues(diskdb ethdb.Database, stem core.StemPath) ([core.StemNodeWi
}
// writeStemValues writes updated stem values to an ethdb batch.
// Nil values delete the key; non-nil values overwrite.
func writeStemValues(batch ethdb.Batch, stem core.StemPath, updates map[byte][]byte) error {
for suffix, value := range updates {
key := stemValueDBKey(stem, suffix)
if value == nil {
// Only slots marked dirty are written. Nil values delete the key.
func writeStemValues(batch ethdb.Batch, stem core.StemPath, values [core.StemNodeWidth][]byte, dirty [core.StemNodeWidth]bool) error {
for i, d := range dirty {
if !d {
continue
}
key := stemValueDBKey(stem, byte(i))
if values[i] == nil {
if err := batch.Delete(key); err != nil {
return err
}
} else {
if err := batch.Put(key, value); err != nil {
if err := batch.Put(key, values[i]); err != nil {
return err
}
}
@ -110,16 +114,16 @@ func groupAndHashStems(
}
// Apply updates.
flatUpdates := make(map[byte][]byte, 4)
var dirty [core.StemNodeWidth]bool
for idx < len(updates) && updates[idx].Stem == stem {
u := updates[idx]
values[u.Suffix] = u.Value
flatUpdates[u.Suffix] = u.Value
dirty[u.Suffix] = true
idx++
}
// Write to flat state.
if err := writeStemValues(batch, stem, flatUpdates); err != nil {
if err := writeStemValues(batch, stem, values, dirty); err != nil {
return nil, err
}
@ -147,13 +151,5 @@ func groupAndHashStems(
// stemLess compares two stem paths lexicographically.
func stemLess(a, b *core.StemPath) bool {
for i := range a {
if a[i] < b[i] {
return true
}
if a[i] > b[i] {
return false
}
}
return false
return bytes.Compare(a[:], b[:]) < 0
}

View file

@ -59,13 +59,17 @@ func TestWriteStemValues(t *testing.T) {
var stem core.StemPath
stem[0] = 0xCC
// Write a value.
// Write a value at slot 3.
val := make([]byte, 32)
val[0] = 0x42
var values [core.StemNodeWidth][]byte
var dirty [core.StemNodeWidth]bool
values[3] = val
dirty[3] = true
batch := diskdb.NewBatch()
updates := map[byte][]byte{3: val}
require.NoError(t, writeStemValues(batch, stem, updates))
require.NoError(t, writeStemValues(batch, stem, values, dirty))
require.NoError(t, batch.Write())
// Verify it was written.
@ -74,9 +78,11 @@ func TestWriteStemValues(t *testing.T) {
assert.Equal(t, val, data)
// Delete it.
values[3] = nil
dirty[3] = true
batch = diskdb.NewBatch()
deletes := map[byte][]byte{3: nil}
require.NoError(t, writeStemValues(batch, stem, deletes))
require.NoError(t, writeStemValues(batch, stem, values, dirty))
require.NoError(t, batch.Write())
has, err := diskdb.Has(stemValueDBKey(stem, 3))

View file

@ -7,8 +7,8 @@
package nomttrie
import (
"bytes"
"encoding/binary"
"sort"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
@ -47,20 +47,23 @@ type NomtTrie struct {
pending []stemUpdate // accumulated stem updates
dirty bool // whether pending updates exist
// allStems tracks the stem hash for every active stem in the trie.
// Updated on each Hash() with results from groupAndHashStems.
// Used to compute the canonical root via BuildInternalTree(skip=0).
allStems map[core.StemPath]core.Node
// allStems tracks the stem hash for every active stem in the trie,
// kept sorted by stem path. Updated on each Hash() via sorted merge
// with results from groupAndHashStems.
allStems []core.StemKeyValue
// mergeBuf is reused across Hash() calls to avoid allocating a new
// slice on every merge. After merge, allStems and mergeBuf swap roles.
mergeBuf []core.StemKeyValue
}
// New creates a new NomtTrie. The root parameter is the current state root.
func New(root common.Hash, backend *nomtdb.Database) (*NomtTrie, error) {
return &NomtTrie{
nomtDB: backend.NomtDB(),
backend: backend,
root: root,
pending: make([]stemUpdate, 0, 64),
allStems: make(map[core.StemPath]core.Node, 64),
nomtDB: backend.NomtDB(),
backend: backend,
root: root,
pending: make([]stemUpdate, 0, 64),
}, nil
}
@ -234,22 +237,23 @@ func (t *NomtTrie) Hash() common.Hash {
return t.root
}
// Update allStems with new/changed stem hashes.
for _, kv := range stemKVs {
t.allStems[kv.Stem] = kv.Hash
}
// Merge sorted stemKVs into allStems (both are sorted by stem path).
// Swap allStems and mergeBuf to reuse backing arrays across calls.
merged := mergeStemKVs(t.allStems, stemKVs, t.mergeBuf)
t.mergeBuf = t.allStems
t.allStems = merged
// Update the page tree for persistent storage.
// stemKVs is already sorted, so skip the redundant sort in db.Update.
if len(stemKVs) > 0 {
if _, err := t.nomtDB.Update(stemKVs); err != nil {
if _, err := t.nomtDB.UpdateSorted(stemKVs); err != nil {
log.Error("NOMT page tree update failed", "err", err)
return t.root
}
}
// Compute the canonical root via BuildInternalTree(skip=0).
// This produces roots identical to bintrie by avoiding the depth-7
// worker split that adds extra wrapping levels.
// allStems is already sorted, so no additional sort needed.
t.root = common.Hash(t.canonicalRoot())
t.pending = t.pending[:0]
@ -258,19 +262,78 @@ func (t *NomtTrie) Hash() common.Hash {
}
// canonicalRoot computes the bintrie-compatible root hash from all known stems
// using BuildInternalTree at skip=0.
// using BuildInternalTree at skip=0. allStems is already sorted.
func (t *NomtTrie) canonicalRoot() core.Node {
if len(t.allStems) == 0 {
return core.Terminator
}
sorted := make([]core.StemKeyValue, 0, len(t.allStems))
for stem, hash := range t.allStems {
sorted = append(sorted, core.StemKeyValue{Stem: stem, Hash: hash})
return core.BuildInternalTree(0, t.allStems, func(_ core.WriteNode) {})
}
// mergeStemKVs merges sorted new stemKVs into sorted existing allStems.
// Existing entries with the same stem are replaced. The result is sorted.
// The buf parameter is reused for the result to avoid allocation when new
// stems need to be inserted.
func mergeStemKVs(existing, updates, buf []core.StemKeyValue) []core.StemKeyValue {
if len(updates) == 0 {
return existing
}
sort.Slice(sorted, func(i, j int) bool {
return stemLess(&sorted[i].Stem, &sorted[j].Stem)
})
return core.BuildInternalTree(0, sorted, func(_ core.WriteNode) {})
if len(existing) == 0 {
return updates
}
// Fast path: check if all updates are in-place replacements (no new stems).
// This is the common case for incremental block updates where accounts
// already exist in the trie.
allInPlace := true
ei := 0
for _, u := range updates {
for ei < len(existing) && bytes.Compare(existing[ei].Stem[:], u.Stem[:]) < 0 {
ei++
}
if ei >= len(existing) || existing[ei].Stem != u.Stem {
allInPlace = false
break
}
}
if allInPlace {
// Update hashes in place — zero allocation.
ei = 0
for _, u := range updates {
for existing[ei].Stem != u.Stem {
ei++
}
existing[ei].Hash = u.Hash
}
return existing
}
// Slow path: some new stems need inserting. Use merge with buffer.
needed := len(existing) + len(updates)
if cap(buf) < needed {
buf = make([]core.StemKeyValue, 0, needed)
}
result := buf[:0]
i, j := 0, 0
for i < len(existing) && j < len(updates) {
cmp := bytes.Compare(existing[i].Stem[:], updates[j].Stem[:])
switch {
case cmp < 0:
result = append(result, existing[i])
i++
case cmp > 0:
result = append(result, updates[j])
j++
default:
result = append(result, updates[j])
i++
j++
}
}
result = append(result, existing[i:]...)
result = append(result, updates[j:]...)
return result
}
// Commit flushes pending operations and returns the root hash.
@ -304,10 +367,8 @@ func (t *NomtTrie) IsVerkle() bool {
func (t *NomtTrie) Copy() *NomtTrie {
pending := make([]stemUpdate, len(t.pending))
copy(pending, t.pending)
allStems := make(map[core.StemPath]core.Node, len(t.allStems))
for k, v := range t.allStems {
allStems[k] = v
}
allStems := make([]core.StemKeyValue, len(t.allStems))
copy(allStems, t.allStems)
return &NomtTrie{
nomtDB: t.nomtDB,
backend: t.backend,