mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-05-24 08:49:29 +00:00
trie/bintrie: postpend bit-length to disambiguate path encoding
The compact LSB-aligned encoding via ActiveBytes packed paths into ceil(len/8) bytes without recording the bit-length. Two distinct paths whose bit-lengths fell in the same byte-bucket and whose integer values matched produced identical bytes — e.g. the 1-bit path "1" and the 8-bit path "00000001" both encoded to [0x01], so two stems sitting at depths 1 and 8 on different branches could clobber each other in nodeset.AddNode. Replace ActiveBytes/PutActiveBytes with KeyBytes/PutKeyBytes, which append a uint8 bit-length byte after the active bytes. Postpend (rather than prepend) so nodes along a single root-to-leaf descent share leading path bytes, improving LSM block locality during traversal. The empty path is encoded as no bytes (not [0x00]): byteCount=0 is unique to len=0 so no disambiguation byte is needed. This keeps the root's DB key empty, matching the resolver's existing nil-path convention.
This commit is contained in:
parent
d464b9e485
commit
012bec0eb1
5 changed files with 42 additions and 21 deletions
|
|
@ -244,29 +244,29 @@ func TestKeyToPath(t *testing.T) {
|
|||
{
|
||||
name: "depth 0",
|
||||
depth: 0,
|
||||
key: []byte{0x80}, // 10000000 in binary
|
||||
expected: []byte{1}, // 1 bit packed: MSB=1 → 0x01
|
||||
key: []byte{0x80}, // 10000000 in binary
|
||||
expected: []byte{0x01, 1}, // 1-bit value 0x01 + length byte 1
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "depth 7",
|
||||
depth: 7,
|
||||
key: []byte{0xFF}, // 11111111 in binary
|
||||
expected: []byte{0xFF}, // 8 bits packed into 1 byte
|
||||
key: []byte{0xFF}, // 11111111 in binary
|
||||
expected: []byte{0xFF, 8}, // 8-bit value 0xFF + length byte 8
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "depth crossing byte boundary",
|
||||
depth: 10,
|
||||
key: []byte{0xFF, 0x00}, // 11111111 00000000 in binary
|
||||
expected: []byte{0x07, 0xF8}, // 11 bits = 11111111000 → 0x07F8
|
||||
key: []byte{0xFF, 0x00}, // 11111111 00000000 in binary
|
||||
expected: []byte{0x07, 0xF8, 11}, // 11-bit value 0x07F8 + length byte 11
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "max valid depth",
|
||||
depth: StemSize*8 - 1,
|
||||
key: make([]byte, HashSize),
|
||||
expected: make([]byte, StemSize), // 248 bits of zeros → 31 packed bytes
|
||||
expected: append(make([]byte, StemSize), StemSize*8), // 248 bits of zeros + length byte 248
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -294,26 +294,47 @@ func (b *BitArray) Set(x *BitArray) *BitArray {
|
|||
return b
|
||||
}
|
||||
|
||||
// ActiveBytes returns a slice containing only the bytes that are actually used by the bit array,
|
||||
// as specified by the length. The returned slice is in big-endian order.
|
||||
// KeyBytes returns the path-to-DB-key encoding: the active bytes in big-endian
|
||||
// order followed by a single trailing byte holding the bit-length. The trailing
|
||||
// length disambiguates paths whose active bytes coincide (e.g. 1-bit "1" and
|
||||
// 8-bit "00000001" both pack to integer value 1, but their key encodings are
|
||||
// [0x01, 0x01] and [0x01, 0x08] respectively).
|
||||
//
|
||||
// The empty path is encoded as no bytes: byteCount=0 is unique to len=0, so
|
||||
// no disambiguation byte is needed.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// len = 10, words = [0x3FF, 0, 0, 0] -> [0x03, 0xFF]
|
||||
func (b *BitArray) ActiveBytes() []byte {
|
||||
// len = 10, words = [0x3FF, 0, 0, 0] -> [0x03, 0xFF, 0x0A]
|
||||
func (b *BitArray) KeyBytes() []byte {
|
||||
if b.len == 0 {
|
||||
return nil
|
||||
}
|
||||
bc := b.byteCount()
|
||||
res := make([]byte, bc+1)
|
||||
wordsBytes := b.Bytes()
|
||||
return wordsBytes[32-b.byteCount():]
|
||||
copy(res[:bc], wordsBytes[32-bc:])
|
||||
res[bc] = b.len
|
||||
return res
|
||||
}
|
||||
|
||||
// PutActiveBytes writes the active bytes into dst (which must be at least 32 bytes)
|
||||
// and returns the populated sub-slice. No heap allocation occurs because the
|
||||
// backing array is owned by the caller.
|
||||
func (b *BitArray) PutActiveBytes(dst *[32]byte) []byte {
|
||||
// PutKeyBytes writes the key encoding (active bytes followed by length byte)
|
||||
// into dst and returns the populated sub-slice. The empty path returns dst[:0]
|
||||
// without touching dst. For non-empty paths dst must have len >= 33 (32 packed
|
||||
// bytes for 248 bits + 1 length byte).
|
||||
func (b *BitArray) PutKeyBytes(dst []byte) []byte {
|
||||
if b.len == 0 {
|
||||
return dst[:0]
|
||||
}
|
||||
_ = dst[32] // bounds check hint
|
||||
binary.BigEndian.PutUint64(dst[0:8], b.words[3])
|
||||
binary.BigEndian.PutUint64(dst[8:16], b.words[2])
|
||||
binary.BigEndian.PutUint64(dst[16:24], b.words[1])
|
||||
binary.BigEndian.PutUint64(dst[24:32], b.words[0])
|
||||
return dst[32-b.byteCount():]
|
||||
bc := b.byteCount()
|
||||
copy(dst, dst[32-bc:32])
|
||||
dst[bc] = b.len
|
||||
return dst[:bc+1]
|
||||
}
|
||||
|
||||
// bitFromLSB returns the bit value at position n, where n = 0 is LSB.
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ func keyToPath(depth int, key []byte) ([]byte, error) {
|
|||
keyLen := min(len(key), 31)
|
||||
ba := new(BitArray).SetBytes(uint8(keyLen*8), key[:keyLen])
|
||||
path := new(BitArray).MSBs(ba, uint8(depth+1))
|
||||
return path.ActiveBytes(), nil
|
||||
return path.KeyBytes(), nil
|
||||
}
|
||||
|
||||
// Invariant: dirty=false implies mustRecompute=false. Every mutation that
|
||||
|
|
|
|||
|
|
@ -201,7 +201,7 @@ func (it *binaryNodeIterator) Path() []byte {
|
|||
}
|
||||
path.AppendBit(&path, uint8(state.Index))
|
||||
}
|
||||
return path.ActiveBytes()
|
||||
return path.KeyBytes()
|
||||
}
|
||||
|
||||
func (it *binaryNodeIterator) NodeBlob() []byte {
|
||||
|
|
|
|||
|
|
@ -321,8 +321,8 @@ func (t *BinaryTrie) Commit(_ bool) (common.Hash, *trienode.NodeSet) {
|
|||
|
||||
var rootPath BitArray
|
||||
t.store.collectNodes(t.store.root, rootPath, func(path BitArray, hash common.Hash, serialized []byte) {
|
||||
var buf [32]byte
|
||||
pathBytes := path.PutActiveBytes(&buf)
|
||||
var buf [33]byte
|
||||
pathBytes := path.PutKeyBytes(buf[:])
|
||||
nodeset.AddNode(pathBytes, trienode.NewNodeWithPrev(hash, serialized, t.tracer.Get(pathBytes)))
|
||||
}, t.groupDepth)
|
||||
return t.Hash(), nodeset
|
||||
|
|
|
|||
Loading…
Reference in a new issue