trie/bintrie: postpend bit-length to disambiguate path encoding

The compact LSB-aligned encoding via ActiveBytes packed paths into
ceil(len/8) bytes without recording the bit-length. Two distinct paths
whose bit-lengths fell in the same byte-bucket and whose integer values
matched produced identical bytes — e.g. the 1-bit path "1" and the 8-bit
path "00000001" both encoded to [0x01], so two stems sitting at depths 1
and 8 on different branches could clobber each other in nodeset.AddNode.

Replace ActiveBytes/PutActiveBytes with KeyBytes/PutKeyBytes, which
append a uint8 bit-length byte after the active bytes. Postpend (rather
than prepend) so nodes along a single root-to-leaf descent share leading
path bytes, improving LSM block locality during traversal.

The empty path is encoded as no bytes (not [0x00]): byteCount=0 is
unique to len=0 so no disambiguation byte is needed. This keeps the
root's DB key empty, matching the resolver's existing nil-path convention.
This commit is contained in:
weiihann 2026-05-08 19:36:16 +08:00
parent d464b9e485
commit 012bec0eb1
5 changed files with 42 additions and 21 deletions

View file

@ -244,29 +244,29 @@ func TestKeyToPath(t *testing.T) {
{
name: "depth 0",
depth: 0,
key: []byte{0x80}, // 10000000 in binary
expected: []byte{1}, // 1 bit packed: MSB=1 → 0x01
key: []byte{0x80}, // 10000000 in binary
expected: []byte{0x01, 1}, // 1-bit value 0x01 + length byte 1
wantErr: false,
},
{
name: "depth 7",
depth: 7,
key: []byte{0xFF}, // 11111111 in binary
expected: []byte{0xFF}, // 8 bits packed into 1 byte
key: []byte{0xFF}, // 11111111 in binary
expected: []byte{0xFF, 8}, // 8-bit value 0xFF + length byte 8
wantErr: false,
},
{
name: "depth crossing byte boundary",
depth: 10,
key: []byte{0xFF, 0x00}, // 11111111 00000000 in binary
expected: []byte{0x07, 0xF8}, // 11 bits = 11111111000 → 0x07F8
key: []byte{0xFF, 0x00}, // 11111111 00000000 in binary
expected: []byte{0x07, 0xF8, 11}, // 11-bit value 0x07F8 + length byte 11
wantErr: false,
},
{
name: "max valid depth",
depth: StemSize*8 - 1,
key: make([]byte, HashSize),
expected: make([]byte, StemSize), // 248 bits of zeros → 31 packed bytes
expected: append(make([]byte, StemSize), StemSize*8), // 248 bits of zeros + length byte 248
wantErr: false,
},
{

View file

@ -294,26 +294,47 @@ func (b *BitArray) Set(x *BitArray) *BitArray {
return b
}
// ActiveBytes returns a slice containing only the bytes that are actually used by the bit array,
// as specified by the length. The returned slice is in big-endian order.
// KeyBytes returns the path-to-DB-key encoding: the active bytes in big-endian
// order followed by a single trailing byte holding the bit-length. The trailing
// length disambiguates paths whose active bytes coincide (e.g. 1-bit "1" and
// 8-bit "00000001" both pack to integer value 1, but their key encodings are
// [0x01, 0x01] and [0x01, 0x08] respectively).
//
// The empty path is encoded as no bytes: byteCount=0 is unique to len=0, so
// no disambiguation byte is needed.
//
// Example:
//
// len = 10, words = [0x3FF, 0, 0, 0] -> [0x03, 0xFF]
func (b *BitArray) ActiveBytes() []byte {
// len = 10, words = [0x3FF, 0, 0, 0] -> [0x03, 0xFF, 0x0A]
func (b *BitArray) KeyBytes() []byte {
if b.len == 0 {
return nil
}
bc := b.byteCount()
res := make([]byte, bc+1)
wordsBytes := b.Bytes()
return wordsBytes[32-b.byteCount():]
copy(res[:bc], wordsBytes[32-bc:])
res[bc] = b.len
return res
}
// PutActiveBytes writes the active bytes into dst (which must be at least 32 bytes)
// and returns the populated sub-slice. No heap allocation occurs because the
// backing array is owned by the caller.
func (b *BitArray) PutActiveBytes(dst *[32]byte) []byte {
// PutKeyBytes writes the key encoding (active bytes followed by length byte)
// into dst and returns the populated sub-slice. The empty path returns dst[:0]
// without touching dst. For non-empty paths dst must have len >= 33 (32 packed
// bytes for 248 bits + 1 length byte).
func (b *BitArray) PutKeyBytes(dst []byte) []byte {
if b.len == 0 {
return dst[:0]
}
_ = dst[32] // bounds check hint
binary.BigEndian.PutUint64(dst[0:8], b.words[3])
binary.BigEndian.PutUint64(dst[8:16], b.words[2])
binary.BigEndian.PutUint64(dst[16:24], b.words[1])
binary.BigEndian.PutUint64(dst[24:32], b.words[0])
return dst[32-b.byteCount():]
bc := b.byteCount()
copy(dst, dst[32-bc:32])
dst[bc] = b.len
return dst[:bc+1]
}
// bitFromLSB returns the bit value at position n, where n = 0 is LSB.

View file

@ -29,7 +29,7 @@ func keyToPath(depth int, key []byte) ([]byte, error) {
keyLen := min(len(key), 31)
ba := new(BitArray).SetBytes(uint8(keyLen*8), key[:keyLen])
path := new(BitArray).MSBs(ba, uint8(depth+1))
return path.ActiveBytes(), nil
return path.KeyBytes(), nil
}
// Invariant: dirty=false implies mustRecompute=false. Every mutation that

View file

@ -201,7 +201,7 @@ func (it *binaryNodeIterator) Path() []byte {
}
path.AppendBit(&path, uint8(state.Index))
}
return path.ActiveBytes()
return path.KeyBytes()
}
func (it *binaryNodeIterator) NodeBlob() []byte {

View file

@ -321,8 +321,8 @@ func (t *BinaryTrie) Commit(_ bool) (common.Hash, *trienode.NodeSet) {
var rootPath BitArray
t.store.collectNodes(t.store.root, rootPath, func(path BitArray, hash common.Hash, serialized []byte) {
var buf [32]byte
pathBytes := path.PutActiveBytes(&buf)
var buf [33]byte
pathBytes := path.PutKeyBytes(buf[:])
nodeset.AddNode(pathBytes, trienode.NewNodeWithPrev(hash, serialized, t.tracer.Get(pathBytes)))
}, t.groupDepth)
return t.Hash(), nodeset