mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-06-21 22:24:32 +00:00
nomt/bitbox: add Phase 3 Bitbox on-disk hash table storage
Implement the on-disk open-addressing hash table for storing trie pages: - htfile.go: HT file layout with header, meta pages, and data pages - metamap.go: in-memory meta byte map with dirty page tracking - probe.go: triangular probing with xxhash64 page ID hashing - db.go: Bitbox DB with StorePage, LoadPage, DeletePage, FlushMeta, Sync The hash table uses 1-byte meta tags (top 7 bits of hash) for fast filtering before reading full 4096-byte data pages. Triangular probing with power-of-2 capacity guarantees all buckets are visited. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
88fd10529f
commit
fef1ed4c4f
5 changed files with 1009 additions and 0 deletions
399
nomt/bitbox/bitbox_test.go
Normal file
399
nomt/bitbox/bitbox_test.go
Normal file
|
|
@ -0,0 +1,399 @@
|
|||
package bitbox
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/ethereum/go-ethereum/nomt/core"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// --- HT File Layout Tests ---
|
||||
|
||||
func TestHTOffsetsMetaByteOffset(t *testing.T) {
|
||||
offsets := NewHTOffsets(8192)
|
||||
assert.Equal(t, int64(pageSize), offsets.MetaByteOffset(0))
|
||||
assert.Equal(t, int64(pageSize+1), offsets.MetaByteOffset(1))
|
||||
}
|
||||
|
||||
func TestHTOffsetsDataPageOffset(t *testing.T) {
|
||||
// capacity=4096 → 1 meta page
|
||||
offsets := NewHTOffsets(4096)
|
||||
assert.Equal(t, uint64(1), offsets.MetaPages)
|
||||
|
||||
// Data starts at: header(4096) + 1 meta page(4096) = 8192
|
||||
assert.Equal(t, int64(8192), offsets.DataPageOffset(0))
|
||||
assert.Equal(t, int64(8192+4096), offsets.DataPageOffset(1))
|
||||
}
|
||||
|
||||
func TestHTOffsetsTotalFileSize(t *testing.T) {
|
||||
offsets := NewHTOffsets(4096)
|
||||
// header(4096) + 1 meta page(4096) + 4096 data pages * 4096
|
||||
expected := int64(4096 + 4096 + 4096*4096)
|
||||
assert.Equal(t, expected, offsets.TotalFileSize())
|
||||
}
|
||||
|
||||
func TestHTOffsetsMetaPagesRoundup(t *testing.T) {
|
||||
offsets := NewHTOffsets(5000)
|
||||
assert.Equal(t, uint64(2), offsets.MetaPages)
|
||||
}
|
||||
|
||||
func TestCreateOpenHTFile(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "test.ht")
|
||||
|
||||
seed := HashSeedFromUint64(42, 99)
|
||||
f, offsets, err := CreateHTFile(path, 1024, seed)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, uint64(1024), offsets.Capacity)
|
||||
f.Close()
|
||||
|
||||
f2, offsets2, seed2, occ, err := OpenHTFile(path)
|
||||
require.NoError(t, err)
|
||||
defer f2.Close()
|
||||
|
||||
assert.Equal(t, seed, seed2)
|
||||
assert.Equal(t, uint64(1024), offsets2.Capacity)
|
||||
assert.Equal(t, uint64(0), occ)
|
||||
}
|
||||
|
||||
// --- Meta Byte Tests ---
|
||||
|
||||
func TestMetaByteEncoding(t *testing.T) {
|
||||
assert.True(t, IsEmpty(MetaEmpty))
|
||||
assert.False(t, IsOccupied(MetaEmpty))
|
||||
assert.False(t, IsTombstone(MetaEmpty))
|
||||
|
||||
assert.True(t, IsTombstone(MetaTombstone))
|
||||
assert.False(t, IsEmpty(MetaTombstone))
|
||||
assert.False(t, IsOccupied(MetaTombstone))
|
||||
|
||||
occupied := MakeOccupied(0xFFFFFFFFFFFFFFFF)
|
||||
assert.True(t, IsOccupied(occupied))
|
||||
assert.False(t, IsEmpty(occupied))
|
||||
assert.False(t, IsTombstone(occupied))
|
||||
}
|
||||
|
||||
func TestMetaByteTagMatching(t *testing.T) {
|
||||
hash := uint64(0xABCDEF1234567890)
|
||||
meta := MakeOccupied(hash)
|
||||
assert.True(t, TagMatches(meta, hash))
|
||||
|
||||
// Different high bits should not match.
|
||||
differentHash := uint64(0x1234EF1234567890)
|
||||
assert.False(t, TagMatches(meta, differentHash))
|
||||
}
|
||||
|
||||
func TestMetaMapSetGet(t *testing.T) {
|
||||
mm := NewMetaMap(8192)
|
||||
assert.Equal(t, MetaEmpty, mm.Get(0))
|
||||
|
||||
mm.Set(100, MakeOccupied(12345))
|
||||
assert.True(t, IsOccupied(mm.Get(100)))
|
||||
}
|
||||
|
||||
func TestMetaMapDirtyTracking(t *testing.T) {
|
||||
mm := NewMetaMap(8192) // 2 meta pages
|
||||
assert.Empty(t, mm.DirtyMetaPages())
|
||||
|
||||
mm.Set(0, MetaTombstone) // page 0
|
||||
mm.Set(5000, MetaTombstone) // page 1
|
||||
|
||||
dirty := mm.DirtyMetaPages()
|
||||
assert.Len(t, dirty, 2)
|
||||
assert.Contains(t, dirty, uint64(0))
|
||||
assert.Contains(t, dirty, uint64(1))
|
||||
|
||||
mm.ClearDirty()
|
||||
assert.Empty(t, mm.DirtyMetaPages())
|
||||
}
|
||||
|
||||
// --- Probe Sequence Tests ---
|
||||
|
||||
func TestProbeSequenceInitial(t *testing.T) {
|
||||
p := NewProbeSequence(42, 1024)
|
||||
assert.Equal(t, uint64(42%1024), p.Bucket())
|
||||
assert.Equal(t, uint64(42), p.Hash())
|
||||
}
|
||||
|
||||
func TestProbeSequenceTriangular(t *testing.T) {
|
||||
p := NewProbeSequence(0, 16) // initial bucket = 0
|
||||
assert.Equal(t, uint64(0), p.Bucket())
|
||||
|
||||
p.Next() // step=1 → (0+1)%16 = 1
|
||||
assert.Equal(t, uint64(1), p.Bucket())
|
||||
|
||||
p.Next() // step=2 → (1+2)%16 = 3
|
||||
assert.Equal(t, uint64(3), p.Bucket())
|
||||
|
||||
p.Next() // step=3 → (3+3)%16 = 6
|
||||
assert.Equal(t, uint64(6), p.Bucket())
|
||||
|
||||
p.Next() // step=4 → (6+4)%16 = 10
|
||||
assert.Equal(t, uint64(10), p.Bucket())
|
||||
}
|
||||
|
||||
func TestProbeSequenceVisitsAll(t *testing.T) {
|
||||
// With power-of-2 capacity, triangular probing should visit all buckets.
|
||||
capacity := uint64(16)
|
||||
p := NewProbeSequence(0, capacity)
|
||||
|
||||
visited := make(map[uint64]bool, capacity)
|
||||
for range capacity {
|
||||
visited[p.Bucket()] = true
|
||||
p.Next()
|
||||
}
|
||||
|
||||
assert.Equal(t, int(capacity), len(visited),
|
||||
"triangular probing should visit all buckets")
|
||||
}
|
||||
|
||||
func TestHashPageID(t *testing.T) {
|
||||
seed := HashSeedFromUint64(1, 2)
|
||||
root := core.RootPageID()
|
||||
h1 := HashPageID(seed, root)
|
||||
h2 := HashPageID(seed, root)
|
||||
assert.Equal(t, h1, h2, "same inputs should produce same hash")
|
||||
|
||||
// Different seed should produce different hash.
|
||||
seed2 := HashSeedFromUint64(3, 4)
|
||||
h3 := HashPageID(seed2, root)
|
||||
assert.NotEqual(t, h1, h3)
|
||||
}
|
||||
|
||||
// --- DB Integration Tests ---
|
||||
|
||||
func TestDBCreateAndOpen(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "test.bitbox")
|
||||
|
||||
seed := HashSeedFromUint64(1, 2)
|
||||
db, err := Create(path, 1024, seed)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, uint64(1024), db.Capacity())
|
||||
assert.Equal(t, int64(0), db.Occupied())
|
||||
require.NoError(t, db.Sync())
|
||||
require.NoError(t, db.Close())
|
||||
|
||||
db2, err := Open(path)
|
||||
require.NoError(t, err)
|
||||
defer db2.Close()
|
||||
assert.Equal(t, seed, db2.Seed())
|
||||
assert.Equal(t, uint64(1024), db2.Capacity())
|
||||
}
|
||||
|
||||
func TestDBStoreAndLoad(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "test.bitbox")
|
||||
|
||||
seed := HashSeedFromUint64(1, 2)
|
||||
db, err := Create(path, 1024, seed)
|
||||
require.NoError(t, err)
|
||||
defer db.Close()
|
||||
|
||||
// Store a page.
|
||||
rootID := core.RootPageID()
|
||||
page := new(core.RawPage)
|
||||
page.SetNodeAt(0, core.Node{0x42})
|
||||
|
||||
bucket, err := db.StorePage(rootID, page)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, int64(1), db.Occupied())
|
||||
|
||||
// Load it back.
|
||||
loaded, loadBucket, found, err := db.LoadPage(rootID)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, found)
|
||||
assert.Equal(t, bucket, loadBucket)
|
||||
assert.Equal(t, core.Node{0x42}, loaded.NodeAt(0))
|
||||
}
|
||||
|
||||
func TestDBStoreOverwrite(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "test.bitbox")
|
||||
|
||||
seed := HashSeedFromUint64(1, 2)
|
||||
db, err := Create(path, 1024, seed)
|
||||
require.NoError(t, err)
|
||||
defer db.Close()
|
||||
|
||||
rootID := core.RootPageID()
|
||||
page1 := new(core.RawPage)
|
||||
page1.SetNodeAt(0, core.Node{0x01})
|
||||
_, err = db.StorePage(rootID, page1)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Overwrite with new data.
|
||||
page2 := new(core.RawPage)
|
||||
page2.SetNodeAt(0, core.Node{0x02})
|
||||
_, err = db.StorePage(rootID, page2)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should still only have 1 occupied.
|
||||
assert.Equal(t, int64(1), db.Occupied())
|
||||
|
||||
loaded, _, found, err := db.LoadPage(rootID)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, found)
|
||||
assert.Equal(t, core.Node{0x02}, loaded.NodeAt(0))
|
||||
}
|
||||
|
||||
func TestDBDelete(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "test.bitbox")
|
||||
|
||||
seed := HashSeedFromUint64(1, 2)
|
||||
db, err := Create(path, 1024, seed)
|
||||
require.NoError(t, err)
|
||||
defer db.Close()
|
||||
|
||||
rootID := core.RootPageID()
|
||||
page := new(core.RawPage)
|
||||
_, err = db.StorePage(rootID, page)
|
||||
require.NoError(t, err)
|
||||
|
||||
deleted, err := db.DeletePage(rootID)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, deleted)
|
||||
assert.Equal(t, int64(0), db.Occupied())
|
||||
|
||||
_, _, found, err := db.LoadPage(rootID)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, found)
|
||||
}
|
||||
|
||||
func TestDBDeleteNonexistent(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "test.bitbox")
|
||||
|
||||
seed := HashSeedFromUint64(1, 2)
|
||||
db, err := Create(path, 1024, seed)
|
||||
require.NoError(t, err)
|
||||
defer db.Close()
|
||||
|
||||
rootID := core.RootPageID()
|
||||
deleted, err := db.DeletePage(rootID)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, deleted)
|
||||
}
|
||||
|
||||
func TestDBLoadMiss(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "test.bitbox")
|
||||
|
||||
seed := HashSeedFromUint64(1, 2)
|
||||
db, err := Create(path, 1024, seed)
|
||||
require.NoError(t, err)
|
||||
defer db.Close()
|
||||
|
||||
rootID := core.RootPageID()
|
||||
_, _, found, err := db.LoadPage(rootID)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, found)
|
||||
}
|
||||
|
||||
func TestDBMultiplePages(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "test.bitbox")
|
||||
|
||||
seed := HashSeedFromUint64(1, 2)
|
||||
db, err := Create(path, 1024, seed)
|
||||
require.NoError(t, err)
|
||||
defer db.Close()
|
||||
|
||||
rootID := core.RootPageID()
|
||||
childID, err := rootID.ChildPageID(0)
|
||||
require.NoError(t, err)
|
||||
childID2, err := rootID.ChildPageID(1)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Store 3 pages.
|
||||
for i, pid := range []core.PageID{rootID, childID, childID2} {
|
||||
page := new(core.RawPage)
|
||||
page.SetNodeAt(0, core.Node{byte(i + 1)})
|
||||
_, err := db.StorePage(pid, page)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
assert.Equal(t, int64(3), db.Occupied())
|
||||
|
||||
// Load each one.
|
||||
for i, pid := range []core.PageID{rootID, childID, childID2} {
|
||||
loaded, _, found, err := db.LoadPage(pid)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, found, "page %d", i)
|
||||
assert.Equal(t, core.Node{byte(i + 1)}, loaded.NodeAt(0))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDBPersistAndReopen(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "test.bitbox")
|
||||
|
||||
seed := HashSeedFromUint64(1, 2)
|
||||
db, err := Create(path, 1024, seed)
|
||||
require.NoError(t, err)
|
||||
|
||||
rootID := core.RootPageID()
|
||||
page := new(core.RawPage)
|
||||
page.SetNodeAt(0, core.Node{0xAB})
|
||||
_, err = db.StorePage(rootID, page)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.NoError(t, db.Sync())
|
||||
require.NoError(t, db.Close())
|
||||
|
||||
// Reopen and verify.
|
||||
db2, err := Open(path)
|
||||
require.NoError(t, err)
|
||||
defer db2.Close()
|
||||
|
||||
loaded, _, found, err := db2.LoadPage(rootID)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, found)
|
||||
assert.Equal(t, core.Node{0xAB}, loaded.NodeAt(0))
|
||||
}
|
||||
|
||||
func TestDBCapacityMustBePowerOf2(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "test.bitbox")
|
||||
seed := HashSeedFromUint64(1, 2)
|
||||
|
||||
_, err := Create(path, 1000, seed)
|
||||
assert.Error(t, err)
|
||||
// Cleanup any partial file.
|
||||
os.Remove(path)
|
||||
}
|
||||
|
||||
func TestDBDeleteAndReinsert(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "test.bitbox")
|
||||
|
||||
seed := HashSeedFromUint64(1, 2)
|
||||
db, err := Create(path, 1024, seed)
|
||||
require.NoError(t, err)
|
||||
defer db.Close()
|
||||
|
||||
rootID := core.RootPageID()
|
||||
|
||||
// Insert → delete → insert should work.
|
||||
page1 := new(core.RawPage)
|
||||
page1.SetNodeAt(0, core.Node{0x01})
|
||||
_, err = db.StorePage(rootID, page1)
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = db.DeletePage(rootID)
|
||||
require.NoError(t, err)
|
||||
|
||||
page2 := new(core.RawPage)
|
||||
page2.SetNodeAt(0, core.Node{0x02})
|
||||
_, err = db.StorePage(rootID, page2)
|
||||
require.NoError(t, err)
|
||||
|
||||
loaded, _, found, err := db.LoadPage(rootID)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, found)
|
||||
assert.Equal(t, core.Node{0x02}, loaded.NodeAt(0))
|
||||
}
|
||||
284
nomt/bitbox/db.go
Normal file
284
nomt/bitbox/db.go
Normal file
|
|
@ -0,0 +1,284 @@
|
|||
package bitbox
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/ethereum/go-ethereum/nomt/core"
|
||||
)
|
||||
|
||||
// DB is the Bitbox on-disk hash table for storing trie pages.
|
||||
type DB struct {
|
||||
file *os.File
|
||||
offsets HTOffsets
|
||||
metaMap *MetaMap
|
||||
seed [16]byte
|
||||
capacity uint64
|
||||
occupied atomic.Int64
|
||||
}
|
||||
|
||||
// Create creates a new Bitbox database at the given path.
|
||||
// Capacity must be a power of 2.
|
||||
func Create(path string, capacity uint64, seed [16]byte) (*DB, error) {
|
||||
if capacity == 0 || capacity&(capacity-1) != 0 {
|
||||
return nil, fmt.Errorf("bitbox: capacity must be a power of 2")
|
||||
}
|
||||
|
||||
f, offsets, err := CreateHTFile(path, capacity, seed)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mm := NewMetaMap(capacity)
|
||||
|
||||
db := &DB{
|
||||
file: f,
|
||||
offsets: offsets,
|
||||
metaMap: mm,
|
||||
seed: seed,
|
||||
capacity: capacity,
|
||||
}
|
||||
return db, nil
|
||||
}
|
||||
|
||||
// Open opens an existing Bitbox database.
|
||||
func Open(path string) (*DB, error) {
|
||||
f, offsets, seed, occupied, err := OpenHTFile(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mm, err := LoadMetaMap(f, offsets)
|
||||
if err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
db := &DB{
|
||||
file: f,
|
||||
offsets: offsets,
|
||||
metaMap: mm,
|
||||
seed: seed,
|
||||
capacity: offsets.Capacity,
|
||||
}
|
||||
db.occupied.Store(int64(occupied))
|
||||
return db, nil
|
||||
}
|
||||
|
||||
// Close closes the database file.
|
||||
func (db *DB) Close() error {
|
||||
return db.file.Close()
|
||||
}
|
||||
|
||||
// Seed returns the hash seed.
|
||||
func (db *DB) Seed() [16]byte {
|
||||
return db.seed
|
||||
}
|
||||
|
||||
// Capacity returns the total number of buckets.
|
||||
func (db *DB) Capacity() uint64 {
|
||||
return db.capacity
|
||||
}
|
||||
|
||||
// Occupied returns the number of occupied buckets.
|
||||
func (db *DB) Occupied() int64 {
|
||||
return db.occupied.Load()
|
||||
}
|
||||
|
||||
// LoadPage reads a page from the hash table by probing for its PageID.
|
||||
// Returns the page, the bucket index where it was found, and whether it exists.
|
||||
func (db *DB) LoadPage(pageID core.PageID) (
|
||||
*core.RawPage, uint64, bool, error,
|
||||
) {
|
||||
hash := HashPageID(db.seed, pageID)
|
||||
probe := NewProbeSequence(hash, db.capacity)
|
||||
encodedID := pageID.Encode()
|
||||
|
||||
for range db.capacity {
|
||||
bucket := probe.Bucket()
|
||||
meta := db.metaMap.Get(bucket)
|
||||
|
||||
if IsEmpty(meta) {
|
||||
// Definitely not in the table.
|
||||
return nil, 0, false, nil
|
||||
}
|
||||
|
||||
if IsTombstone(meta) {
|
||||
probe.Next()
|
||||
continue
|
||||
}
|
||||
|
||||
if !TagMatches(meta, hash) {
|
||||
probe.Next()
|
||||
continue
|
||||
}
|
||||
|
||||
// Tag matches — read the data page to confirm.
|
||||
page, err := db.readDataPage(bucket)
|
||||
if err != nil {
|
||||
return nil, 0, false, err
|
||||
}
|
||||
|
||||
storedID := page.PageIDBytes()
|
||||
if storedID == encodedID {
|
||||
return page, bucket, true, nil
|
||||
}
|
||||
|
||||
probe.Next()
|
||||
}
|
||||
|
||||
return nil, 0, false, nil
|
||||
}
|
||||
|
||||
// StorePage writes a page to the hash table. If the page already exists
|
||||
// (by probing), it is overwritten in-place. Otherwise, a new bucket is
|
||||
// allocated.
|
||||
func (db *DB) StorePage(pageID core.PageID, page *core.RawPage) (
|
||||
uint64, error,
|
||||
) {
|
||||
// Ensure the encoded PageID is in the page data.
|
||||
encodedID := pageID.Encode()
|
||||
page.SetPageIDBytes(encodedID)
|
||||
|
||||
hash := HashPageID(db.seed, pageID)
|
||||
probe := NewProbeSequence(hash, db.capacity)
|
||||
metaByte := MakeOccupied(hash)
|
||||
|
||||
var firstTombstone int64 = -1
|
||||
|
||||
for range db.capacity {
|
||||
bucket := probe.Bucket()
|
||||
meta := db.metaMap.Get(bucket)
|
||||
|
||||
if IsEmpty(meta) {
|
||||
// Use tombstone if we passed one, otherwise use this empty slot.
|
||||
target := bucket
|
||||
if firstTombstone >= 0 {
|
||||
target = uint64(firstTombstone)
|
||||
} else {
|
||||
db.occupied.Add(1)
|
||||
}
|
||||
db.metaMap.Set(target, metaByte)
|
||||
if err := db.writeDataPage(target, page); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return target, nil
|
||||
}
|
||||
|
||||
if IsTombstone(meta) {
|
||||
if firstTombstone < 0 {
|
||||
firstTombstone = int64(bucket)
|
||||
}
|
||||
probe.Next()
|
||||
continue
|
||||
}
|
||||
|
||||
if TagMatches(meta, hash) {
|
||||
// Check if this is the same page.
|
||||
existing, err := db.readDataPage(bucket)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if existing.PageIDBytes() == encodedID {
|
||||
// Overwrite in-place.
|
||||
if err := db.writeDataPage(bucket, page); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return bucket, nil
|
||||
}
|
||||
}
|
||||
|
||||
probe.Next()
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("bitbox: hash table full")
|
||||
}
|
||||
|
||||
// DeletePage removes a page from the hash table by setting its meta byte
|
||||
// to tombstone.
|
||||
func (db *DB) DeletePage(pageID core.PageID) (bool, error) {
|
||||
hash := HashPageID(db.seed, pageID)
|
||||
probe := NewProbeSequence(hash, db.capacity)
|
||||
encodedID := pageID.Encode()
|
||||
|
||||
for range db.capacity {
|
||||
bucket := probe.Bucket()
|
||||
meta := db.metaMap.Get(bucket)
|
||||
|
||||
if IsEmpty(meta) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if IsTombstone(meta) {
|
||||
probe.Next()
|
||||
continue
|
||||
}
|
||||
|
||||
if TagMatches(meta, hash) {
|
||||
existing, err := db.readDataPage(bucket)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if existing.PageIDBytes() == encodedID {
|
||||
db.metaMap.Set(bucket, MetaTombstone)
|
||||
db.occupied.Add(-1)
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
probe.Next()
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// FlushMeta writes all dirty meta pages to disk and updates the header.
|
||||
func (db *DB) FlushMeta() error {
|
||||
for _, pageIdx := range db.metaMap.DirtyMetaPages() {
|
||||
if err := db.metaMap.WriteMetaPage(db.file, pageIdx); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
db.metaMap.ClearDirty()
|
||||
|
||||
// Update occupied count in header.
|
||||
var buf [8]byte
|
||||
occ := max(db.occupied.Load(), 0)
|
||||
binary.LittleEndian.PutUint64(buf[:], uint64(occ))
|
||||
if _, err := db.file.WriteAt(buf[:], occupiedOffset); err != nil {
|
||||
return fmt.Errorf("bitbox: update occupied count: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sync flushes all pending data to disk.
|
||||
func (db *DB) Sync() error {
|
||||
if err := db.FlushMeta(); err != nil {
|
||||
return err
|
||||
}
|
||||
return db.file.Sync()
|
||||
}
|
||||
|
||||
// --- internal I/O ---
|
||||
|
||||
func (db *DB) readDataPage(bucket uint64) (*core.RawPage, error) {
|
||||
page := new(core.RawPage)
|
||||
offset := db.offsets.DataPageOffset(bucket)
|
||||
if _, err := db.file.ReadAt(page[:], offset); err != nil {
|
||||
return nil, fmt.Errorf("bitbox: read data page at bucket %d: %w",
|
||||
bucket, err)
|
||||
}
|
||||
return page, nil
|
||||
}
|
||||
|
||||
func (db *DB) writeDataPage(bucket uint64, page *core.RawPage) error {
|
||||
offset := db.offsets.DataPageOffset(bucket)
|
||||
if _, err := db.file.WriteAt(page[:], offset); err != nil {
|
||||
return fmt.Errorf("bitbox: write data page at bucket %d: %w",
|
||||
bucket, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
127
nomt/bitbox/htfile.go
Normal file
127
nomt/bitbox/htfile.go
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
// Package bitbox implements an on-disk open-addressing hash table that maps
|
||||
// PageIDs to 4096-byte pages. It is the storage backend for the NOMT trie.
|
||||
package bitbox
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/ethereum/go-ethereum/nomt/core"
|
||||
)
|
||||
|
||||
const (
|
||||
// pageSize is the size of a disk page.
|
||||
pageSize = core.PageSize // 4096
|
||||
|
||||
// metaBytesPerPage is the number of meta bytes that fit in one page.
|
||||
metaBytesPerPage = pageSize
|
||||
|
||||
// headerSize is the size of the HT file header in bytes.
|
||||
// Layout: [seed 16] [capacity 8] [occupied 8] = 32 bytes, padded to
|
||||
// one full page.
|
||||
headerSize = pageSize
|
||||
|
||||
// seedOffset is the offset of the 16-byte seed in the header.
|
||||
seedOffset = 0
|
||||
// capacityOffset is the offset of the 8-byte capacity in the header.
|
||||
capacityOffset = 16
|
||||
// occupiedOffset is the offset of the 8-byte occupied count.
|
||||
occupiedOffset = 24
|
||||
)
|
||||
|
||||
// HTOffsets holds precomputed file offsets for the hash table file layout.
|
||||
//
|
||||
// File layout:
|
||||
//
|
||||
// [header: 1 page] [meta pages: ceil(capacity/4096)] [data pages: capacity * 4096]
|
||||
type HTOffsets struct {
|
||||
// Capacity is the number of buckets in the hash table.
|
||||
Capacity uint64
|
||||
// MetaPages is ceil(Capacity / 4096).
|
||||
MetaPages uint64
|
||||
}
|
||||
|
||||
// NewHTOffsets creates an HTOffsets for the given capacity.
|
||||
func NewHTOffsets(capacity uint64) HTOffsets {
|
||||
return HTOffsets{
|
||||
Capacity: capacity,
|
||||
MetaPages: (capacity + metaBytesPerPage - 1) / metaBytesPerPage,
|
||||
}
|
||||
}
|
||||
|
||||
// MetaByteOffset returns the file offset for the meta byte of a given bucket.
|
||||
func (o *HTOffsets) MetaByteOffset(bucket uint64) int64 {
|
||||
return int64(headerSize) + int64(bucket)
|
||||
}
|
||||
|
||||
// DataPageOffset returns the file offset for the data page of a given bucket.
|
||||
func (o *HTOffsets) DataPageOffset(bucket uint64) int64 {
|
||||
dataStart := int64(headerSize) + int64(o.MetaPages)*pageSize
|
||||
return dataStart + int64(bucket)*pageSize
|
||||
}
|
||||
|
||||
// TotalFileSize returns the total size of the HT file in bytes.
|
||||
func (o *HTOffsets) TotalFileSize() int64 {
|
||||
return int64(headerSize) + int64(o.MetaPages)*pageSize +
|
||||
int64(o.Capacity)*pageSize
|
||||
}
|
||||
|
||||
// CreateHTFile creates a new hash table file with the given capacity and seed.
|
||||
// The file is pre-allocated to its full size.
|
||||
func CreateHTFile(path string, capacity uint64, seed [16]byte) (
|
||||
*os.File, HTOffsets, error,
|
||||
) {
|
||||
offsets := NewHTOffsets(capacity)
|
||||
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
return nil, offsets, fmt.Errorf("bitbox: create HT file: %w", err)
|
||||
}
|
||||
|
||||
// Pre-allocate.
|
||||
totalSize := offsets.TotalFileSize()
|
||||
if err := f.Truncate(totalSize); err != nil {
|
||||
f.Close()
|
||||
return nil, offsets, fmt.Errorf("bitbox: truncate HT file: %w", err)
|
||||
}
|
||||
|
||||
// Write header.
|
||||
var header [headerSize]byte
|
||||
copy(header[seedOffset:], seed[:])
|
||||
binary.LittleEndian.PutUint64(header[capacityOffset:], capacity)
|
||||
binary.LittleEndian.PutUint64(header[occupiedOffset:], 0)
|
||||
|
||||
if _, err := f.WriteAt(header[:], 0); err != nil {
|
||||
f.Close()
|
||||
return nil, offsets, fmt.Errorf("bitbox: write header: %w", err)
|
||||
}
|
||||
|
||||
return f, offsets, nil
|
||||
}
|
||||
|
||||
// OpenHTFile opens an existing hash table file and reads its header.
|
||||
func OpenHTFile(path string) (
|
||||
*os.File, HTOffsets, [16]byte, uint64, error,
|
||||
) {
|
||||
f, err := os.OpenFile(path, os.O_RDWR, 0)
|
||||
if err != nil {
|
||||
return nil, HTOffsets{}, [16]byte{}, 0,
|
||||
fmt.Errorf("bitbox: open HT file: %w", err)
|
||||
}
|
||||
|
||||
var header [headerSize]byte
|
||||
if _, err := f.ReadAt(header[:], 0); err != nil {
|
||||
f.Close()
|
||||
return nil, HTOffsets{}, [16]byte{}, 0,
|
||||
fmt.Errorf("bitbox: read header: %w", err)
|
||||
}
|
||||
|
||||
var seed [16]byte
|
||||
copy(seed[:], header[seedOffset:seedOffset+16])
|
||||
capacity := binary.LittleEndian.Uint64(header[capacityOffset:])
|
||||
occupied := binary.LittleEndian.Uint64(header[occupiedOffset:])
|
||||
offsets := NewHTOffsets(capacity)
|
||||
|
||||
return f, offsets, seed, occupied, nil
|
||||
}
|
||||
118
nomt/bitbox/metamap.go
Normal file
118
nomt/bitbox/metamap.go
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
package bitbox
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
// Meta byte constants.
|
||||
const (
|
||||
// MetaEmpty marks an empty bucket.
|
||||
MetaEmpty byte = 0x00
|
||||
// MetaTombstone marks a deleted bucket (still probed through).
|
||||
MetaTombstone byte = 0x7F
|
||||
)
|
||||
|
||||
// IsOccupied reports whether a meta byte indicates an occupied bucket.
|
||||
// Occupied bytes have bit 7 set (value >= 0x80).
|
||||
func IsOccupied(b byte) bool {
|
||||
return b&0x80 != 0
|
||||
}
|
||||
|
||||
// IsEmpty reports whether a meta byte indicates an empty bucket.
|
||||
func IsEmpty(b byte) bool {
|
||||
return b == MetaEmpty
|
||||
}
|
||||
|
||||
// IsTombstone reports whether a meta byte indicates a tombstone.
|
||||
func IsTombstone(b byte) bool {
|
||||
return b == MetaTombstone
|
||||
}
|
||||
|
||||
// MakeOccupied creates an occupied meta byte from a hash value.
|
||||
// It takes the top 7 bits of the hash and sets bit 7 to 1.
|
||||
func MakeOccupied(hash uint64) byte {
|
||||
return 0x80 | byte(hash>>57)
|
||||
}
|
||||
|
||||
// TagMatches reports whether an occupied meta byte could match a given hash.
|
||||
func TagMatches(metaByte byte, hash uint64) bool {
|
||||
return IsOccupied(metaByte) && metaByte == MakeOccupied(hash)
|
||||
}
|
||||
|
||||
// MetaMap holds an in-memory copy of all meta bytes for the hash table.
|
||||
type MetaMap struct {
|
||||
data []byte
|
||||
dirty []bool // per meta-page dirty tracking
|
||||
}
|
||||
|
||||
// NewMetaMap creates a MetaMap for the given capacity with all empty buckets.
|
||||
func NewMetaMap(capacity uint64) *MetaMap {
|
||||
metaPages := (capacity + metaBytesPerPage - 1) / metaBytesPerPage
|
||||
return &MetaMap{
|
||||
data: make([]byte, capacity),
|
||||
dirty: make([]bool, metaPages),
|
||||
}
|
||||
}
|
||||
|
||||
// LoadMetaMap reads all meta bytes from the HT file into memory.
|
||||
func LoadMetaMap(f *os.File, offsets HTOffsets) (*MetaMap, error) {
|
||||
mm := NewMetaMap(offsets.Capacity)
|
||||
|
||||
// Read all meta bytes at once.
|
||||
metaRegionSize := int64(offsets.MetaPages) * pageSize
|
||||
buf := make([]byte, metaRegionSize)
|
||||
if _, err := f.ReadAt(buf, int64(headerSize)); err != nil {
|
||||
return nil, fmt.Errorf("bitbox: load meta map: %w", err)
|
||||
}
|
||||
|
||||
// Copy only the capacity-many bytes (the rest is padding).
|
||||
copy(mm.data, buf[:offsets.Capacity])
|
||||
return mm, nil
|
||||
}
|
||||
|
||||
// Get returns the meta byte for a bucket.
|
||||
func (m *MetaMap) Get(bucket uint64) byte {
|
||||
return m.data[bucket]
|
||||
}
|
||||
|
||||
// Set writes a meta byte for a bucket and marks the containing page dirty.
|
||||
func (m *MetaMap) Set(bucket uint64, value byte) {
|
||||
m.data[bucket] = value
|
||||
m.dirty[bucket/metaBytesPerPage] = true
|
||||
}
|
||||
|
||||
// DirtyMetaPages returns the indices of meta pages that have been modified
|
||||
// since the last call to ClearDirty.
|
||||
func (m *MetaMap) DirtyMetaPages() []uint64 {
|
||||
pages := make([]uint64, 0, len(m.dirty))
|
||||
for i, d := range m.dirty {
|
||||
if d {
|
||||
pages = append(pages, uint64(i))
|
||||
}
|
||||
}
|
||||
return pages
|
||||
}
|
||||
|
||||
// ClearDirty resets all dirty flags.
|
||||
func (m *MetaMap) ClearDirty() {
|
||||
for i := range m.dirty {
|
||||
m.dirty[i] = false
|
||||
}
|
||||
}
|
||||
|
||||
// WriteMetaPage writes a single meta page (identified by index) to the file.
|
||||
func (m *MetaMap) WriteMetaPage(
|
||||
f *os.File, pageIdx uint64,
|
||||
) error {
|
||||
var buf [pageSize]byte
|
||||
start := pageIdx * metaBytesPerPage
|
||||
end := min(start+metaBytesPerPage, uint64(len(m.data)))
|
||||
copy(buf[:], m.data[start:end])
|
||||
|
||||
offset := int64(headerSize) + int64(pageIdx)*pageSize
|
||||
if _, err := f.WriteAt(buf[:], offset); err != nil {
|
||||
return fmt.Errorf("bitbox: write meta page %d: %w", pageIdx, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
81
nomt/bitbox/probe.go
Normal file
81
nomt/bitbox/probe.go
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
package bitbox
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
"github.com/ethereum/go-ethereum/nomt/core"
|
||||
)
|
||||
|
||||
// HashPageID computes the xxhash64 of seed||encodedPageID.
|
||||
func HashPageID(seed [16]byte, pageID core.PageID) uint64 {
|
||||
encoded := pageID.Encode()
|
||||
var buf [48]byte
|
||||
copy(buf[:16], seed[:])
|
||||
copy(buf[16:], encoded[:])
|
||||
return xxhash.Sum64(buf[:])
|
||||
}
|
||||
|
||||
// HashPageIDBytes computes the xxhash64 from seed and raw encoded page ID.
|
||||
func HashPageIDBytes(seed [16]byte, encodedPageID [32]byte) uint64 {
|
||||
var buf [48]byte
|
||||
copy(buf[:16], seed[:])
|
||||
copy(buf[16:], encodedPageID[:])
|
||||
return xxhash.Sum64(buf[:])
|
||||
}
|
||||
|
||||
// HashSeedFromBytes creates a [16]byte seed from a byte slice.
|
||||
func HashSeedFromBytes(b []byte) [16]byte {
|
||||
var seed [16]byte
|
||||
copy(seed[:], b)
|
||||
return seed
|
||||
}
|
||||
|
||||
// HashSeedFromUint64 creates a deterministic seed from two uint64 values.
|
||||
func HashSeedFromUint64(a, b uint64) [16]byte {
|
||||
var seed [16]byte
|
||||
binary.LittleEndian.PutUint64(seed[:8], a)
|
||||
binary.LittleEndian.PutUint64(seed[8:], b)
|
||||
return seed
|
||||
}
|
||||
|
||||
// ProbeSequence implements triangular probing over the hash table.
|
||||
//
|
||||
// Bucket(step) = (initial + step*(step+1)/2) mod capacity
|
||||
//
|
||||
// With a power-of-2 capacity, triangular probing visits every bucket before
|
||||
// repeating, guaranteeing termination.
|
||||
type ProbeSequence struct {
|
||||
hash uint64
|
||||
bucket uint64
|
||||
step uint64
|
||||
capacity uint64
|
||||
}
|
||||
|
||||
// NewProbeSequence creates a new probe sequence for the given hash and
|
||||
// capacity. The capacity MUST be a power of 2.
|
||||
func NewProbeSequence(hash, capacity uint64) ProbeSequence {
|
||||
initial := hash % capacity
|
||||
return ProbeSequence{
|
||||
hash: hash,
|
||||
bucket: initial,
|
||||
step: 0,
|
||||
capacity: capacity,
|
||||
}
|
||||
}
|
||||
|
||||
// Bucket returns the current bucket index.
|
||||
func (p *ProbeSequence) Bucket() uint64 {
|
||||
return p.bucket
|
||||
}
|
||||
|
||||
// Hash returns the hash used to seed this probe.
|
||||
func (p *ProbeSequence) Hash() uint64 {
|
||||
return p.hash
|
||||
}
|
||||
|
||||
// Next advances to the next bucket in the triangular probe sequence.
|
||||
func (p *ProbeSequence) Next() {
|
||||
p.step++
|
||||
p.bucket = (p.bucket + p.step) % p.capacity
|
||||
}
|
||||
Loading…
Reference in a new issue