nomt/bitbox: add Phase 3 Bitbox on-disk hash table storage

Implement the on-disk open-addressing hash table for storing trie pages:
- htfile.go: HT file layout with header, meta pages, and data pages
- metamap.go: in-memory meta byte map with dirty page tracking
- probe.go: triangular probing with xxhash64 page ID hashing
- db.go: Bitbox DB with StorePage, LoadPage, DeletePage, FlushMeta, Sync

The hash table uses 1-byte meta tags (top 7 bits of hash) for fast
filtering before reading full 4096-byte data pages. Triangular probing
with power-of-2 capacity guarantees all buckets are visited.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
weiihann 2026-02-12 17:14:20 +08:00
parent 88fd10529f
commit fef1ed4c4f
5 changed files with 1009 additions and 0 deletions

399
nomt/bitbox/bitbox_test.go Normal file
View file

@ -0,0 +1,399 @@
package bitbox
import (
"os"
"path/filepath"
"testing"
"github.com/ethereum/go-ethereum/nomt/core"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// --- HT File Layout Tests ---
func TestHTOffsetsMetaByteOffset(t *testing.T) {
offsets := NewHTOffsets(8192)
assert.Equal(t, int64(pageSize), offsets.MetaByteOffset(0))
assert.Equal(t, int64(pageSize+1), offsets.MetaByteOffset(1))
}
func TestHTOffsetsDataPageOffset(t *testing.T) {
// capacity=4096 → 1 meta page
offsets := NewHTOffsets(4096)
assert.Equal(t, uint64(1), offsets.MetaPages)
// Data starts at: header(4096) + 1 meta page(4096) = 8192
assert.Equal(t, int64(8192), offsets.DataPageOffset(0))
assert.Equal(t, int64(8192+4096), offsets.DataPageOffset(1))
}
func TestHTOffsetsTotalFileSize(t *testing.T) {
offsets := NewHTOffsets(4096)
// header(4096) + 1 meta page(4096) + 4096 data pages * 4096
expected := int64(4096 + 4096 + 4096*4096)
assert.Equal(t, expected, offsets.TotalFileSize())
}
func TestHTOffsetsMetaPagesRoundup(t *testing.T) {
offsets := NewHTOffsets(5000)
assert.Equal(t, uint64(2), offsets.MetaPages)
}
func TestCreateOpenHTFile(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.ht")
seed := HashSeedFromUint64(42, 99)
f, offsets, err := CreateHTFile(path, 1024, seed)
require.NoError(t, err)
assert.Equal(t, uint64(1024), offsets.Capacity)
f.Close()
f2, offsets2, seed2, occ, err := OpenHTFile(path)
require.NoError(t, err)
defer f2.Close()
assert.Equal(t, seed, seed2)
assert.Equal(t, uint64(1024), offsets2.Capacity)
assert.Equal(t, uint64(0), occ)
}
// --- Meta Byte Tests ---
func TestMetaByteEncoding(t *testing.T) {
assert.True(t, IsEmpty(MetaEmpty))
assert.False(t, IsOccupied(MetaEmpty))
assert.False(t, IsTombstone(MetaEmpty))
assert.True(t, IsTombstone(MetaTombstone))
assert.False(t, IsEmpty(MetaTombstone))
assert.False(t, IsOccupied(MetaTombstone))
occupied := MakeOccupied(0xFFFFFFFFFFFFFFFF)
assert.True(t, IsOccupied(occupied))
assert.False(t, IsEmpty(occupied))
assert.False(t, IsTombstone(occupied))
}
func TestMetaByteTagMatching(t *testing.T) {
hash := uint64(0xABCDEF1234567890)
meta := MakeOccupied(hash)
assert.True(t, TagMatches(meta, hash))
// Different high bits should not match.
differentHash := uint64(0x1234EF1234567890)
assert.False(t, TagMatches(meta, differentHash))
}
func TestMetaMapSetGet(t *testing.T) {
mm := NewMetaMap(8192)
assert.Equal(t, MetaEmpty, mm.Get(0))
mm.Set(100, MakeOccupied(12345))
assert.True(t, IsOccupied(mm.Get(100)))
}
func TestMetaMapDirtyTracking(t *testing.T) {
mm := NewMetaMap(8192) // 2 meta pages
assert.Empty(t, mm.DirtyMetaPages())
mm.Set(0, MetaTombstone) // page 0
mm.Set(5000, MetaTombstone) // page 1
dirty := mm.DirtyMetaPages()
assert.Len(t, dirty, 2)
assert.Contains(t, dirty, uint64(0))
assert.Contains(t, dirty, uint64(1))
mm.ClearDirty()
assert.Empty(t, mm.DirtyMetaPages())
}
// --- Probe Sequence Tests ---
func TestProbeSequenceInitial(t *testing.T) {
p := NewProbeSequence(42, 1024)
assert.Equal(t, uint64(42%1024), p.Bucket())
assert.Equal(t, uint64(42), p.Hash())
}
func TestProbeSequenceTriangular(t *testing.T) {
p := NewProbeSequence(0, 16) // initial bucket = 0
assert.Equal(t, uint64(0), p.Bucket())
p.Next() // step=1 → (0+1)%16 = 1
assert.Equal(t, uint64(1), p.Bucket())
p.Next() // step=2 → (1+2)%16 = 3
assert.Equal(t, uint64(3), p.Bucket())
p.Next() // step=3 → (3+3)%16 = 6
assert.Equal(t, uint64(6), p.Bucket())
p.Next() // step=4 → (6+4)%16 = 10
assert.Equal(t, uint64(10), p.Bucket())
}
func TestProbeSequenceVisitsAll(t *testing.T) {
// With power-of-2 capacity, triangular probing should visit all buckets.
capacity := uint64(16)
p := NewProbeSequence(0, capacity)
visited := make(map[uint64]bool, capacity)
for range capacity {
visited[p.Bucket()] = true
p.Next()
}
assert.Equal(t, int(capacity), len(visited),
"triangular probing should visit all buckets")
}
func TestHashPageID(t *testing.T) {
seed := HashSeedFromUint64(1, 2)
root := core.RootPageID()
h1 := HashPageID(seed, root)
h2 := HashPageID(seed, root)
assert.Equal(t, h1, h2, "same inputs should produce same hash")
// Different seed should produce different hash.
seed2 := HashSeedFromUint64(3, 4)
h3 := HashPageID(seed2, root)
assert.NotEqual(t, h1, h3)
}
// --- DB Integration Tests ---
func TestDBCreateAndOpen(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.bitbox")
seed := HashSeedFromUint64(1, 2)
db, err := Create(path, 1024, seed)
require.NoError(t, err)
assert.Equal(t, uint64(1024), db.Capacity())
assert.Equal(t, int64(0), db.Occupied())
require.NoError(t, db.Sync())
require.NoError(t, db.Close())
db2, err := Open(path)
require.NoError(t, err)
defer db2.Close()
assert.Equal(t, seed, db2.Seed())
assert.Equal(t, uint64(1024), db2.Capacity())
}
func TestDBStoreAndLoad(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.bitbox")
seed := HashSeedFromUint64(1, 2)
db, err := Create(path, 1024, seed)
require.NoError(t, err)
defer db.Close()
// Store a page.
rootID := core.RootPageID()
page := new(core.RawPage)
page.SetNodeAt(0, core.Node{0x42})
bucket, err := db.StorePage(rootID, page)
require.NoError(t, err)
assert.Equal(t, int64(1), db.Occupied())
// Load it back.
loaded, loadBucket, found, err := db.LoadPage(rootID)
require.NoError(t, err)
assert.True(t, found)
assert.Equal(t, bucket, loadBucket)
assert.Equal(t, core.Node{0x42}, loaded.NodeAt(0))
}
func TestDBStoreOverwrite(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.bitbox")
seed := HashSeedFromUint64(1, 2)
db, err := Create(path, 1024, seed)
require.NoError(t, err)
defer db.Close()
rootID := core.RootPageID()
page1 := new(core.RawPage)
page1.SetNodeAt(0, core.Node{0x01})
_, err = db.StorePage(rootID, page1)
require.NoError(t, err)
// Overwrite with new data.
page2 := new(core.RawPage)
page2.SetNodeAt(0, core.Node{0x02})
_, err = db.StorePage(rootID, page2)
require.NoError(t, err)
// Should still only have 1 occupied.
assert.Equal(t, int64(1), db.Occupied())
loaded, _, found, err := db.LoadPage(rootID)
require.NoError(t, err)
assert.True(t, found)
assert.Equal(t, core.Node{0x02}, loaded.NodeAt(0))
}
func TestDBDelete(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.bitbox")
seed := HashSeedFromUint64(1, 2)
db, err := Create(path, 1024, seed)
require.NoError(t, err)
defer db.Close()
rootID := core.RootPageID()
page := new(core.RawPage)
_, err = db.StorePage(rootID, page)
require.NoError(t, err)
deleted, err := db.DeletePage(rootID)
require.NoError(t, err)
assert.True(t, deleted)
assert.Equal(t, int64(0), db.Occupied())
_, _, found, err := db.LoadPage(rootID)
require.NoError(t, err)
assert.False(t, found)
}
func TestDBDeleteNonexistent(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.bitbox")
seed := HashSeedFromUint64(1, 2)
db, err := Create(path, 1024, seed)
require.NoError(t, err)
defer db.Close()
rootID := core.RootPageID()
deleted, err := db.DeletePage(rootID)
require.NoError(t, err)
assert.False(t, deleted)
}
func TestDBLoadMiss(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.bitbox")
seed := HashSeedFromUint64(1, 2)
db, err := Create(path, 1024, seed)
require.NoError(t, err)
defer db.Close()
rootID := core.RootPageID()
_, _, found, err := db.LoadPage(rootID)
require.NoError(t, err)
assert.False(t, found)
}
func TestDBMultiplePages(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.bitbox")
seed := HashSeedFromUint64(1, 2)
db, err := Create(path, 1024, seed)
require.NoError(t, err)
defer db.Close()
rootID := core.RootPageID()
childID, err := rootID.ChildPageID(0)
require.NoError(t, err)
childID2, err := rootID.ChildPageID(1)
require.NoError(t, err)
// Store 3 pages.
for i, pid := range []core.PageID{rootID, childID, childID2} {
page := new(core.RawPage)
page.SetNodeAt(0, core.Node{byte(i + 1)})
_, err := db.StorePage(pid, page)
require.NoError(t, err)
}
assert.Equal(t, int64(3), db.Occupied())
// Load each one.
for i, pid := range []core.PageID{rootID, childID, childID2} {
loaded, _, found, err := db.LoadPage(pid)
require.NoError(t, err)
assert.True(t, found, "page %d", i)
assert.Equal(t, core.Node{byte(i + 1)}, loaded.NodeAt(0))
}
}
func TestDBPersistAndReopen(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.bitbox")
seed := HashSeedFromUint64(1, 2)
db, err := Create(path, 1024, seed)
require.NoError(t, err)
rootID := core.RootPageID()
page := new(core.RawPage)
page.SetNodeAt(0, core.Node{0xAB})
_, err = db.StorePage(rootID, page)
require.NoError(t, err)
require.NoError(t, db.Sync())
require.NoError(t, db.Close())
// Reopen and verify.
db2, err := Open(path)
require.NoError(t, err)
defer db2.Close()
loaded, _, found, err := db2.LoadPage(rootID)
require.NoError(t, err)
assert.True(t, found)
assert.Equal(t, core.Node{0xAB}, loaded.NodeAt(0))
}
func TestDBCapacityMustBePowerOf2(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.bitbox")
seed := HashSeedFromUint64(1, 2)
_, err := Create(path, 1000, seed)
assert.Error(t, err)
// Cleanup any partial file.
os.Remove(path)
}
func TestDBDeleteAndReinsert(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.bitbox")
seed := HashSeedFromUint64(1, 2)
db, err := Create(path, 1024, seed)
require.NoError(t, err)
defer db.Close()
rootID := core.RootPageID()
// Insert → delete → insert should work.
page1 := new(core.RawPage)
page1.SetNodeAt(0, core.Node{0x01})
_, err = db.StorePage(rootID, page1)
require.NoError(t, err)
_, err = db.DeletePage(rootID)
require.NoError(t, err)
page2 := new(core.RawPage)
page2.SetNodeAt(0, core.Node{0x02})
_, err = db.StorePage(rootID, page2)
require.NoError(t, err)
loaded, _, found, err := db.LoadPage(rootID)
require.NoError(t, err)
assert.True(t, found)
assert.Equal(t, core.Node{0x02}, loaded.NodeAt(0))
}

284
nomt/bitbox/db.go Normal file
View file

@ -0,0 +1,284 @@
package bitbox
import (
"encoding/binary"
"fmt"
"os"
"sync/atomic"
"github.com/ethereum/go-ethereum/nomt/core"
)
// DB is the Bitbox on-disk hash table for storing trie pages.
type DB struct {
file *os.File
offsets HTOffsets
metaMap *MetaMap
seed [16]byte
capacity uint64
occupied atomic.Int64
}
// Create creates a new Bitbox database at the given path.
// Capacity must be a power of 2.
func Create(path string, capacity uint64, seed [16]byte) (*DB, error) {
if capacity == 0 || capacity&(capacity-1) != 0 {
return nil, fmt.Errorf("bitbox: capacity must be a power of 2")
}
f, offsets, err := CreateHTFile(path, capacity, seed)
if err != nil {
return nil, err
}
mm := NewMetaMap(capacity)
db := &DB{
file: f,
offsets: offsets,
metaMap: mm,
seed: seed,
capacity: capacity,
}
return db, nil
}
// Open opens an existing Bitbox database.
func Open(path string) (*DB, error) {
f, offsets, seed, occupied, err := OpenHTFile(path)
if err != nil {
return nil, err
}
mm, err := LoadMetaMap(f, offsets)
if err != nil {
f.Close()
return nil, err
}
db := &DB{
file: f,
offsets: offsets,
metaMap: mm,
seed: seed,
capacity: offsets.Capacity,
}
db.occupied.Store(int64(occupied))
return db, nil
}
// Close closes the database file.
func (db *DB) Close() error {
return db.file.Close()
}
// Seed returns the hash seed.
func (db *DB) Seed() [16]byte {
return db.seed
}
// Capacity returns the total number of buckets.
func (db *DB) Capacity() uint64 {
return db.capacity
}
// Occupied returns the number of occupied buckets.
func (db *DB) Occupied() int64 {
return db.occupied.Load()
}
// LoadPage reads a page from the hash table by probing for its PageID.
// Returns the page, the bucket index where it was found, and whether it exists.
func (db *DB) LoadPage(pageID core.PageID) (
*core.RawPage, uint64, bool, error,
) {
hash := HashPageID(db.seed, pageID)
probe := NewProbeSequence(hash, db.capacity)
encodedID := pageID.Encode()
for range db.capacity {
bucket := probe.Bucket()
meta := db.metaMap.Get(bucket)
if IsEmpty(meta) {
// Definitely not in the table.
return nil, 0, false, nil
}
if IsTombstone(meta) {
probe.Next()
continue
}
if !TagMatches(meta, hash) {
probe.Next()
continue
}
// Tag matches — read the data page to confirm.
page, err := db.readDataPage(bucket)
if err != nil {
return nil, 0, false, err
}
storedID := page.PageIDBytes()
if storedID == encodedID {
return page, bucket, true, nil
}
probe.Next()
}
return nil, 0, false, nil
}
// StorePage writes a page to the hash table. If the page already exists
// (by probing), it is overwritten in-place. Otherwise, a new bucket is
// allocated.
func (db *DB) StorePage(pageID core.PageID, page *core.RawPage) (
uint64, error,
) {
// Ensure the encoded PageID is in the page data.
encodedID := pageID.Encode()
page.SetPageIDBytes(encodedID)
hash := HashPageID(db.seed, pageID)
probe := NewProbeSequence(hash, db.capacity)
metaByte := MakeOccupied(hash)
var firstTombstone int64 = -1
for range db.capacity {
bucket := probe.Bucket()
meta := db.metaMap.Get(bucket)
if IsEmpty(meta) {
// Use tombstone if we passed one, otherwise use this empty slot.
target := bucket
if firstTombstone >= 0 {
target = uint64(firstTombstone)
} else {
db.occupied.Add(1)
}
db.metaMap.Set(target, metaByte)
if err := db.writeDataPage(target, page); err != nil {
return 0, err
}
return target, nil
}
if IsTombstone(meta) {
if firstTombstone < 0 {
firstTombstone = int64(bucket)
}
probe.Next()
continue
}
if TagMatches(meta, hash) {
// Check if this is the same page.
existing, err := db.readDataPage(bucket)
if err != nil {
return 0, err
}
if existing.PageIDBytes() == encodedID {
// Overwrite in-place.
if err := db.writeDataPage(bucket, page); err != nil {
return 0, err
}
return bucket, nil
}
}
probe.Next()
}
return 0, fmt.Errorf("bitbox: hash table full")
}
// DeletePage removes a page from the hash table by setting its meta byte
// to tombstone.
func (db *DB) DeletePage(pageID core.PageID) (bool, error) {
hash := HashPageID(db.seed, pageID)
probe := NewProbeSequence(hash, db.capacity)
encodedID := pageID.Encode()
for range db.capacity {
bucket := probe.Bucket()
meta := db.metaMap.Get(bucket)
if IsEmpty(meta) {
return false, nil
}
if IsTombstone(meta) {
probe.Next()
continue
}
if TagMatches(meta, hash) {
existing, err := db.readDataPage(bucket)
if err != nil {
return false, err
}
if existing.PageIDBytes() == encodedID {
db.metaMap.Set(bucket, MetaTombstone)
db.occupied.Add(-1)
return true, nil
}
}
probe.Next()
}
return false, nil
}
// FlushMeta writes all dirty meta pages to disk and updates the header.
func (db *DB) FlushMeta() error {
for _, pageIdx := range db.metaMap.DirtyMetaPages() {
if err := db.metaMap.WriteMetaPage(db.file, pageIdx); err != nil {
return err
}
}
db.metaMap.ClearDirty()
// Update occupied count in header.
var buf [8]byte
occ := max(db.occupied.Load(), 0)
binary.LittleEndian.PutUint64(buf[:], uint64(occ))
if _, err := db.file.WriteAt(buf[:], occupiedOffset); err != nil {
return fmt.Errorf("bitbox: update occupied count: %w", err)
}
return nil
}
// Sync flushes all pending data to disk.
func (db *DB) Sync() error {
if err := db.FlushMeta(); err != nil {
return err
}
return db.file.Sync()
}
// --- internal I/O ---
func (db *DB) readDataPage(bucket uint64) (*core.RawPage, error) {
page := new(core.RawPage)
offset := db.offsets.DataPageOffset(bucket)
if _, err := db.file.ReadAt(page[:], offset); err != nil {
return nil, fmt.Errorf("bitbox: read data page at bucket %d: %w",
bucket, err)
}
return page, nil
}
func (db *DB) writeDataPage(bucket uint64, page *core.RawPage) error {
offset := db.offsets.DataPageOffset(bucket)
if _, err := db.file.WriteAt(page[:], offset); err != nil {
return fmt.Errorf("bitbox: write data page at bucket %d: %w",
bucket, err)
}
return nil
}

127
nomt/bitbox/htfile.go Normal file
View file

@ -0,0 +1,127 @@
// Package bitbox implements an on-disk open-addressing hash table that maps
// PageIDs to 4096-byte pages. It is the storage backend for the NOMT trie.
package bitbox
import (
"encoding/binary"
"fmt"
"os"
"github.com/ethereum/go-ethereum/nomt/core"
)
const (
// pageSize is the size of a disk page.
pageSize = core.PageSize // 4096
// metaBytesPerPage is the number of meta bytes that fit in one page.
metaBytesPerPage = pageSize
// headerSize is the size of the HT file header in bytes.
// Layout: [seed 16] [capacity 8] [occupied 8] = 32 bytes, padded to
// one full page.
headerSize = pageSize
// seedOffset is the offset of the 16-byte seed in the header.
seedOffset = 0
// capacityOffset is the offset of the 8-byte capacity in the header.
capacityOffset = 16
// occupiedOffset is the offset of the 8-byte occupied count.
occupiedOffset = 24
)
// HTOffsets holds precomputed file offsets for the hash table file layout.
//
// File layout:
//
// [header: 1 page] [meta pages: ceil(capacity/4096)] [data pages: capacity * 4096]
type HTOffsets struct {
// Capacity is the number of buckets in the hash table.
Capacity uint64
// MetaPages is ceil(Capacity / 4096).
MetaPages uint64
}
// NewHTOffsets creates an HTOffsets for the given capacity.
func NewHTOffsets(capacity uint64) HTOffsets {
return HTOffsets{
Capacity: capacity,
MetaPages: (capacity + metaBytesPerPage - 1) / metaBytesPerPage,
}
}
// MetaByteOffset returns the file offset for the meta byte of a given bucket.
func (o *HTOffsets) MetaByteOffset(bucket uint64) int64 {
return int64(headerSize) + int64(bucket)
}
// DataPageOffset returns the file offset for the data page of a given bucket.
func (o *HTOffsets) DataPageOffset(bucket uint64) int64 {
dataStart := int64(headerSize) + int64(o.MetaPages)*pageSize
return dataStart + int64(bucket)*pageSize
}
// TotalFileSize returns the total size of the HT file in bytes.
func (o *HTOffsets) TotalFileSize() int64 {
return int64(headerSize) + int64(o.MetaPages)*pageSize +
int64(o.Capacity)*pageSize
}
// CreateHTFile creates a new hash table file with the given capacity and seed.
// The file is pre-allocated to its full size.
func CreateHTFile(path string, capacity uint64, seed [16]byte) (
*os.File, HTOffsets, error,
) {
offsets := NewHTOffsets(capacity)
f, err := os.Create(path)
if err != nil {
return nil, offsets, fmt.Errorf("bitbox: create HT file: %w", err)
}
// Pre-allocate.
totalSize := offsets.TotalFileSize()
if err := f.Truncate(totalSize); err != nil {
f.Close()
return nil, offsets, fmt.Errorf("bitbox: truncate HT file: %w", err)
}
// Write header.
var header [headerSize]byte
copy(header[seedOffset:], seed[:])
binary.LittleEndian.PutUint64(header[capacityOffset:], capacity)
binary.LittleEndian.PutUint64(header[occupiedOffset:], 0)
if _, err := f.WriteAt(header[:], 0); err != nil {
f.Close()
return nil, offsets, fmt.Errorf("bitbox: write header: %w", err)
}
return f, offsets, nil
}
// OpenHTFile opens an existing hash table file and reads its header.
func OpenHTFile(path string) (
*os.File, HTOffsets, [16]byte, uint64, error,
) {
f, err := os.OpenFile(path, os.O_RDWR, 0)
if err != nil {
return nil, HTOffsets{}, [16]byte{}, 0,
fmt.Errorf("bitbox: open HT file: %w", err)
}
var header [headerSize]byte
if _, err := f.ReadAt(header[:], 0); err != nil {
f.Close()
return nil, HTOffsets{}, [16]byte{}, 0,
fmt.Errorf("bitbox: read header: %w", err)
}
var seed [16]byte
copy(seed[:], header[seedOffset:seedOffset+16])
capacity := binary.LittleEndian.Uint64(header[capacityOffset:])
occupied := binary.LittleEndian.Uint64(header[occupiedOffset:])
offsets := NewHTOffsets(capacity)
return f, offsets, seed, occupied, nil
}

118
nomt/bitbox/metamap.go Normal file
View file

@ -0,0 +1,118 @@
package bitbox
import (
"fmt"
"os"
)
// Meta byte constants.
const (
// MetaEmpty marks an empty bucket.
MetaEmpty byte = 0x00
// MetaTombstone marks a deleted bucket (still probed through).
MetaTombstone byte = 0x7F
)
// IsOccupied reports whether a meta byte indicates an occupied bucket.
// Occupied bytes have bit 7 set (value >= 0x80).
func IsOccupied(b byte) bool {
return b&0x80 != 0
}
// IsEmpty reports whether a meta byte indicates an empty bucket.
func IsEmpty(b byte) bool {
return b == MetaEmpty
}
// IsTombstone reports whether a meta byte indicates a tombstone.
func IsTombstone(b byte) bool {
return b == MetaTombstone
}
// MakeOccupied creates an occupied meta byte from a hash value.
// It takes the top 7 bits of the hash and sets bit 7 to 1.
func MakeOccupied(hash uint64) byte {
return 0x80 | byte(hash>>57)
}
// TagMatches reports whether an occupied meta byte could match a given hash.
func TagMatches(metaByte byte, hash uint64) bool {
return IsOccupied(metaByte) && metaByte == MakeOccupied(hash)
}
// MetaMap holds an in-memory copy of all meta bytes for the hash table.
type MetaMap struct {
data []byte
dirty []bool // per meta-page dirty tracking
}
// NewMetaMap creates a MetaMap for the given capacity with all empty buckets.
func NewMetaMap(capacity uint64) *MetaMap {
metaPages := (capacity + metaBytesPerPage - 1) / metaBytesPerPage
return &MetaMap{
data: make([]byte, capacity),
dirty: make([]bool, metaPages),
}
}
// LoadMetaMap reads all meta bytes from the HT file into memory.
func LoadMetaMap(f *os.File, offsets HTOffsets) (*MetaMap, error) {
mm := NewMetaMap(offsets.Capacity)
// Read all meta bytes at once.
metaRegionSize := int64(offsets.MetaPages) * pageSize
buf := make([]byte, metaRegionSize)
if _, err := f.ReadAt(buf, int64(headerSize)); err != nil {
return nil, fmt.Errorf("bitbox: load meta map: %w", err)
}
// Copy only the capacity-many bytes (the rest is padding).
copy(mm.data, buf[:offsets.Capacity])
return mm, nil
}
// Get returns the meta byte for a bucket.
func (m *MetaMap) Get(bucket uint64) byte {
return m.data[bucket]
}
// Set writes a meta byte for a bucket and marks the containing page dirty.
func (m *MetaMap) Set(bucket uint64, value byte) {
m.data[bucket] = value
m.dirty[bucket/metaBytesPerPage] = true
}
// DirtyMetaPages returns the indices of meta pages that have been modified
// since the last call to ClearDirty.
func (m *MetaMap) DirtyMetaPages() []uint64 {
pages := make([]uint64, 0, len(m.dirty))
for i, d := range m.dirty {
if d {
pages = append(pages, uint64(i))
}
}
return pages
}
// ClearDirty resets all dirty flags.
func (m *MetaMap) ClearDirty() {
for i := range m.dirty {
m.dirty[i] = false
}
}
// WriteMetaPage writes a single meta page (identified by index) to the file.
func (m *MetaMap) WriteMetaPage(
f *os.File, pageIdx uint64,
) error {
var buf [pageSize]byte
start := pageIdx * metaBytesPerPage
end := min(start+metaBytesPerPage, uint64(len(m.data)))
copy(buf[:], m.data[start:end])
offset := int64(headerSize) + int64(pageIdx)*pageSize
if _, err := f.WriteAt(buf[:], offset); err != nil {
return fmt.Errorf("bitbox: write meta page %d: %w", pageIdx, err)
}
return nil
}

81
nomt/bitbox/probe.go Normal file
View file

@ -0,0 +1,81 @@
package bitbox
import (
"encoding/binary"
"github.com/cespare/xxhash/v2"
"github.com/ethereum/go-ethereum/nomt/core"
)
// HashPageID computes the xxhash64 of seed||encodedPageID.
func HashPageID(seed [16]byte, pageID core.PageID) uint64 {
encoded := pageID.Encode()
var buf [48]byte
copy(buf[:16], seed[:])
copy(buf[16:], encoded[:])
return xxhash.Sum64(buf[:])
}
// HashPageIDBytes computes the xxhash64 from seed and raw encoded page ID.
func HashPageIDBytes(seed [16]byte, encodedPageID [32]byte) uint64 {
var buf [48]byte
copy(buf[:16], seed[:])
copy(buf[16:], encodedPageID[:])
return xxhash.Sum64(buf[:])
}
// HashSeedFromBytes creates a [16]byte seed from a byte slice.
func HashSeedFromBytes(b []byte) [16]byte {
var seed [16]byte
copy(seed[:], b)
return seed
}
// HashSeedFromUint64 creates a deterministic seed from two uint64 values.
func HashSeedFromUint64(a, b uint64) [16]byte {
var seed [16]byte
binary.LittleEndian.PutUint64(seed[:8], a)
binary.LittleEndian.PutUint64(seed[8:], b)
return seed
}
// ProbeSequence implements triangular probing over the hash table.
//
// Bucket(step) = (initial + step*(step+1)/2) mod capacity
//
// With a power-of-2 capacity, triangular probing visits every bucket before
// repeating, guaranteeing termination.
type ProbeSequence struct {
hash uint64
bucket uint64
step uint64
capacity uint64
}
// NewProbeSequence creates a new probe sequence for the given hash and
// capacity. The capacity MUST be a power of 2.
func NewProbeSequence(hash, capacity uint64) ProbeSequence {
initial := hash % capacity
return ProbeSequence{
hash: hash,
bucket: initial,
step: 0,
capacity: capacity,
}
}
// Bucket returns the current bucket index.
func (p *ProbeSequence) Bucket() uint64 {
return p.bucket
}
// Hash returns the hash used to seed this probe.
func (p *ProbeSequence) Hash() uint64 {
return p.hash
}
// Next advances to the next bucket in the triangular probe sequence.
func (p *ProbeSequence) Next() {
p.step++
p.bucket = (p.bucket + p.step) % p.capacity
}