go-ethereum/nomt/bitbox/metamap.go
weiihann fef1ed4c4f nomt/bitbox: add Phase 3 Bitbox on-disk hash table storage
Implement the on-disk open-addressing hash table for storing trie pages:
- htfile.go: HT file layout with header, meta pages, and data pages
- metamap.go: in-memory meta byte map with dirty page tracking
- probe.go: triangular probing with xxhash64 page ID hashing
- db.go: Bitbox DB with StorePage, LoadPage, DeletePage, FlushMeta, Sync

The hash table uses 1-byte meta tags (top 7 bits of hash) for fast
filtering before reading full 4096-byte data pages. Triangular probing
with power-of-2 capacity guarantees all buckets are visited.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-12 17:14:20 +08:00

118 lines
3.2 KiB
Go

package bitbox
import (
"fmt"
"os"
)
// Meta byte constants.
const (
// MetaEmpty marks an empty bucket.
MetaEmpty byte = 0x00
// MetaTombstone marks a deleted bucket (still probed through).
MetaTombstone byte = 0x7F
)
// IsOccupied reports whether a meta byte indicates an occupied bucket.
// Occupied bytes have bit 7 set (value >= 0x80).
func IsOccupied(b byte) bool {
return b&0x80 != 0
}
// IsEmpty reports whether a meta byte indicates an empty bucket.
func IsEmpty(b byte) bool {
return b == MetaEmpty
}
// IsTombstone reports whether a meta byte indicates a tombstone.
func IsTombstone(b byte) bool {
return b == MetaTombstone
}
// MakeOccupied creates an occupied meta byte from a hash value.
// It takes the top 7 bits of the hash and sets bit 7 to 1.
func MakeOccupied(hash uint64) byte {
return 0x80 | byte(hash>>57)
}
// TagMatches reports whether an occupied meta byte could match a given hash.
func TagMatches(metaByte byte, hash uint64) bool {
return IsOccupied(metaByte) && metaByte == MakeOccupied(hash)
}
// MetaMap holds an in-memory copy of all meta bytes for the hash table.
type MetaMap struct {
data []byte
dirty []bool // per meta-page dirty tracking
}
// NewMetaMap creates a MetaMap for the given capacity with all empty buckets.
func NewMetaMap(capacity uint64) *MetaMap {
metaPages := (capacity + metaBytesPerPage - 1) / metaBytesPerPage
return &MetaMap{
data: make([]byte, capacity),
dirty: make([]bool, metaPages),
}
}
// LoadMetaMap reads all meta bytes from the HT file into memory.
func LoadMetaMap(f *os.File, offsets HTOffsets) (*MetaMap, error) {
mm := NewMetaMap(offsets.Capacity)
// Read all meta bytes at once.
metaRegionSize := int64(offsets.MetaPages) * pageSize
buf := make([]byte, metaRegionSize)
if _, err := f.ReadAt(buf, int64(headerSize)); err != nil {
return nil, fmt.Errorf("bitbox: load meta map: %w", err)
}
// Copy only the capacity-many bytes (the rest is padding).
copy(mm.data, buf[:offsets.Capacity])
return mm, nil
}
// Get returns the meta byte for a bucket.
func (m *MetaMap) Get(bucket uint64) byte {
return m.data[bucket]
}
// Set writes a meta byte for a bucket and marks the containing page dirty.
func (m *MetaMap) Set(bucket uint64, value byte) {
m.data[bucket] = value
m.dirty[bucket/metaBytesPerPage] = true
}
// DirtyMetaPages returns the indices of meta pages that have been modified
// since the last call to ClearDirty.
func (m *MetaMap) DirtyMetaPages() []uint64 {
pages := make([]uint64, 0, len(m.dirty))
for i, d := range m.dirty {
if d {
pages = append(pages, uint64(i))
}
}
return pages
}
// ClearDirty resets all dirty flags.
func (m *MetaMap) ClearDirty() {
for i := range m.dirty {
m.dirty[i] = false
}
}
// WriteMetaPage writes a single meta page (identified by index) to the file.
func (m *MetaMap) WriteMetaPage(
f *os.File, pageIdx uint64,
) error {
var buf [pageSize]byte
start := pageIdx * metaBytesPerPage
end := min(start+metaBytesPerPage, uint64(len(m.data)))
copy(buf[:], m.data[start:end])
offset := int64(headerSize) + int64(pageIdx)*pageSize
if _, err := f.WriteAt(buf[:], offset); err != nil {
return fmt.Errorf("bitbox: write meta page %d: %w", pageIdx, err)
}
return nil
}