go-ethereum/nomt/db/db.go
weiihann cb3e13d93d nomt/merkle: add Phase 7 parallel workers for trie updates
Parallelize the PageWalker trie update across multiple goroutines by
partitioning sorted operations by the root page's 64 child subtrees
(first 6 bits of each key path).

Each worker runs an independent PageWalker constrained to child pages
below the root (using parentPage mechanism), producing ChildPageRoots.
After all workers complete, a root walker places the child roots using
AdvanceAndPlaceNode and concludes with the final trie root.

Workers operate on disjoint page subtrees so no synchronization is
needed during computation — only sync.WaitGroup for goroutine join.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-12 18:37:42 +08:00

273 lines
6.3 KiB
Go

// Package db provides the unified NOMT trie database combining Bitbox
// storage with the PageWalker merkle engine.
//
// This package handles only the trie structure (merkle pages). Flat
// key-value storage (accounts, storage slots) stays on geth's PebbleDB.
package db
import (
"crypto/rand"
"fmt"
"os"
"path/filepath"
"runtime"
"sort"
"sync"
"github.com/ethereum/go-ethereum/nomt/bitbox"
"github.com/ethereum/go-ethereum/nomt/core"
"github.com/ethereum/go-ethereum/nomt/merkle"
)
const (
htFileName = "nomt.ht"
walFileName = "nomt.wal"
)
// Config holds configuration for the NOMT database.
type Config struct {
// HTCapacity is the number of hash table buckets. Must be a power of 2.
HTCapacity uint64
// NumWorkers is the number of parallel goroutines for trie updates.
// Defaults to runtime.NumCPU() if zero.
NumWorkers int
}
// DefaultConfig returns a default configuration.
func DefaultConfig() Config {
return Config{
HTCapacity: 1 << 20, // ~1M buckets = ~4GB
}
}
// DB is the NOMT trie database.
type DB struct {
dataDir string
bb *bitbox.DB
root core.Node
syncSeqn uint32
numWorkers int
mu sync.RWMutex
}
// Open opens or creates a NOMT trie database at the given directory.
func Open(dataDir string, config Config) (*DB, error) {
if err := os.MkdirAll(dataDir, 0755); err != nil {
return nil, fmt.Errorf("nomt/db: create datadir: %w", err)
}
htPath := filepath.Join(dataDir, htFileName)
walPath := filepath.Join(dataDir, walFileName)
var bb *bitbox.DB
var err error
if _, statErr := os.Stat(htPath); os.IsNotExist(statErr) {
// Create new database.
var seed [16]byte
if _, err := rand.Read(seed[:]); err != nil {
return nil, fmt.Errorf("nomt/db: generate seed: %w", err)
}
bb, err = bitbox.Create(htPath, config.HTCapacity, seed)
if err != nil {
return nil, fmt.Errorf("nomt/db: create bitbox: %w", err)
}
} else {
// Open existing database.
bb, err = bitbox.Open(htPath)
if err != nil {
return nil, fmt.Errorf("nomt/db: open bitbox: %w", err)
}
}
numWorkers := config.NumWorkers
if numWorkers <= 0 {
numWorkers = runtime.NumCPU()
}
db := &DB{
dataDir: dataDir,
bb: bb,
root: core.Terminator,
numWorkers: numWorkers,
}
// Run WAL recovery.
seqn, err := bb.Recover(walPath)
if err != nil {
bb.Close()
return nil, fmt.Errorf("nomt/db: recover: %w", err)
}
if seqn > 0 {
db.syncSeqn = seqn
}
return db, nil
}
// Root returns the current trie root hash.
func (db *DB) Root() core.Node {
db.mu.RLock()
defer db.mu.RUnlock()
return db.root
}
// SetRoot sets the current trie root (used when loading state from metadata).
func (db *DB) SetRoot(root core.Node) {
db.mu.Lock()
defer db.mu.Unlock()
db.root = root
}
// SyncSeqn returns the current sync sequence number.
func (db *DB) SyncSeqn() uint32 {
db.mu.RLock()
defer db.mu.RUnlock()
return db.syncSeqn
}
// Update applies a batch of leaf operations to the trie.
//
// Operations are sorted by key internally. The function:
// 1. Builds a PageSet from Bitbox
// 2. Groups operations by their terminal node position
// 3. Runs the PageWalker to produce updated pages
// 4. Persists updated pages via Bitbox sync
// 5. Returns the new root hash
func (db *DB) Update(ops []core.LeafOp) (core.Node, error) {
if len(ops) == 0 {
return db.Root(), nil
}
db.mu.Lock()
defer db.mu.Unlock()
// Sort ops by key path.
sort.Slice(ops, func(i, j int) bool {
return ops[i].Key != ops[j].Key && keyLess(&ops[i].Key, &ops[j].Key)
})
// Convert to KeyValue (filter out deletes).
kvs := make([]core.KeyValue, 0, len(ops))
for _, op := range ops {
if op.Value != nil {
kvs = append(kvs, core.KeyValue{Key: op.Key, Value: *op.Value})
}
}
if len(kvs) == 0 {
return db.root, nil
}
pageSetFactory := func() merkle.PageSet {
return newBitboxPageSet(db.bb)
}
out := merkle.ParallelUpdate(db.root, kvs, db.numWorkers, pageSetFactory)
// Persist updated pages.
walPath := filepath.Join(db.dataDir, walFileName)
db.syncSeqn++
if err := db.bb.FullSync(walPath, db.syncSeqn, out.Pages); err != nil {
return core.Terminator, fmt.Errorf("nomt/db: sync: %w", err)
}
db.root = out.Root
return out.Root, nil
}
// LoadPage loads a page from Bitbox storage by its PageID.
func (db *DB) LoadPage(pageID core.PageID) (*core.RawPage, error) {
page, _, found, err := db.bb.LoadPage(pageID)
if err != nil {
return nil, fmt.Errorf("nomt/db: load page: %w", err)
}
if !found {
return nil, nil
}
return page, nil
}
// Close closes the database.
func (db *DB) Close() error {
return db.bb.Close()
}
// --- BitboxPageSet ---
// bitboxPageSet implements merkle.PageSet backed by Bitbox disk storage.
type bitboxPageSet struct {
bb *bitbox.DB
cache map[string]*core.RawPage
}
func newBitboxPageSet(bb *bitbox.DB) *bitboxPageSet {
return &bitboxPageSet{
bb: bb,
cache: make(map[string]*core.RawPage, 16),
}
}
func (ps *bitboxPageSet) Get(pageID core.PageID) (
*core.RawPage, merkle.PageOrigin, bool,
) {
key := pageIDKey(pageID)
if cached, ok := ps.cache[key]; ok {
pageCopy := new(core.RawPage)
*pageCopy = *cached
return pageCopy, merkle.PageOrigin{
Kind: merkle.PageOriginPersisted,
}, true
}
page, _, found, err := ps.bb.LoadPage(pageID)
if err != nil || !found {
// Return a fresh page if not found — this handles the case
// where the trie is being built from scratch or expanded
// into new regions.
fresh := new(core.RawPage)
return fresh, merkle.PageOrigin{Kind: merkle.PageOriginFresh}, true
}
ps.cache[key] = page
pageCopy := new(core.RawPage)
*pageCopy = *page
return pageCopy, merkle.PageOrigin{
Kind: merkle.PageOriginPersisted,
}, true
}
func (ps *bitboxPageSet) Contains(pageID core.PageID) bool {
key := pageIDKey(pageID)
if _, ok := ps.cache[key]; ok {
return true
}
_, _, found, _ := ps.bb.LoadPage(pageID)
return found
}
func (ps *bitboxPageSet) Fresh(pageID core.PageID) *core.RawPage {
return new(core.RawPage)
}
func (ps *bitboxPageSet) Insert(
pageID core.PageID, page *core.RawPage, origin merkle.PageOrigin,
) {
ps.cache[pageIDKey(pageID)] = page
}
func pageIDKey(id core.PageID) string {
encoded := id.Encode()
return string(encoded[:])
}
func keyLess(a, b *core.KeyPath) bool {
for i := range a {
if a[i] < b[i] {
return true
}
if a[i] > b[i] {
return false
}
}
return false
}