From 012bec0eb1fdf398a6fd7ab49be09ab935f7d9ce Mon Sep 17 00:00:00 2001 From: weiihann Date: Fri, 8 May 2026 19:36:16 +0800 Subject: [PATCH] trie/bintrie: postpend bit-length to disambiguate path encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compact LSB-aligned encoding via ActiveBytes packed paths into ceil(len/8) bytes without recording the bit-length. Two distinct paths whose bit-lengths fell in the same byte-bucket and whose integer values matched produced identical bytes — e.g. the 1-bit path "1" and the 8-bit path "00000001" both encoded to [0x01], so two stems sitting at depths 1 and 8 on different branches could clobber each other in nodeset.AddNode. Replace ActiveBytes/PutActiveBytes with KeyBytes/PutKeyBytes, which append a uint8 bit-length byte after the active bytes. Postpend (rather than prepend) so nodes along a single root-to-leaf descent share leading path bytes, improving LSM block locality during traversal. The empty path is encoded as no bytes (not [0x00]): byteCount=0 is unique to len=0 so no disambiguation byte is needed. This keeps the root's DB key empty, matching the resolver's existing nil-path convention. --- trie/bintrie/binary_node_test.go | 14 +++++------ trie/bintrie/bitarray.go | 41 ++++++++++++++++++++++++-------- trie/bintrie/internal_node.go | 2 +- trie/bintrie/iterator.go | 2 +- trie/bintrie/trie.go | 4 ++-- 5 files changed, 42 insertions(+), 21 deletions(-) diff --git a/trie/bintrie/binary_node_test.go b/trie/bintrie/binary_node_test.go index ff3b8225bb..e30c875e9d 100644 --- a/trie/bintrie/binary_node_test.go +++ b/trie/bintrie/binary_node_test.go @@ -244,29 +244,29 @@ func TestKeyToPath(t *testing.T) { { name: "depth 0", depth: 0, - key: []byte{0x80}, // 10000000 in binary - expected: []byte{1}, // 1 bit packed: MSB=1 → 0x01 + key: []byte{0x80}, // 10000000 in binary + expected: []byte{0x01, 1}, // 1-bit value 0x01 + length byte 1 wantErr: false, }, { name: "depth 7", depth: 7, - key: []byte{0xFF}, // 11111111 in binary - expected: []byte{0xFF}, // 8 bits packed into 1 byte + key: []byte{0xFF}, // 11111111 in binary + expected: []byte{0xFF, 8}, // 8-bit value 0xFF + length byte 8 wantErr: false, }, { name: "depth crossing byte boundary", depth: 10, - key: []byte{0xFF, 0x00}, // 11111111 00000000 in binary - expected: []byte{0x07, 0xF8}, // 11 bits = 11111111000 → 0x07F8 + key: []byte{0xFF, 0x00}, // 11111111 00000000 in binary + expected: []byte{0x07, 0xF8, 11}, // 11-bit value 0x07F8 + length byte 11 wantErr: false, }, { name: "max valid depth", depth: StemSize*8 - 1, key: make([]byte, HashSize), - expected: make([]byte, StemSize), // 248 bits of zeros → 31 packed bytes + expected: append(make([]byte, StemSize), StemSize*8), // 248 bits of zeros + length byte 248 wantErr: false, }, { diff --git a/trie/bintrie/bitarray.go b/trie/bintrie/bitarray.go index 5b39a629d3..b202be94e4 100644 --- a/trie/bintrie/bitarray.go +++ b/trie/bintrie/bitarray.go @@ -294,26 +294,47 @@ func (b *BitArray) Set(x *BitArray) *BitArray { return b } -// ActiveBytes returns a slice containing only the bytes that are actually used by the bit array, -// as specified by the length. The returned slice is in big-endian order. +// KeyBytes returns the path-to-DB-key encoding: the active bytes in big-endian +// order followed by a single trailing byte holding the bit-length. The trailing +// length disambiguates paths whose active bytes coincide (e.g. 1-bit "1" and +// 8-bit "00000001" both pack to integer value 1, but their key encodings are +// [0x01, 0x01] and [0x01, 0x08] respectively). +// +// The empty path is encoded as no bytes: byteCount=0 is unique to len=0, so +// no disambiguation byte is needed. // // Example: // -// len = 10, words = [0x3FF, 0, 0, 0] -> [0x03, 0xFF] -func (b *BitArray) ActiveBytes() []byte { +// len = 10, words = [0x3FF, 0, 0, 0] -> [0x03, 0xFF, 0x0A] +func (b *BitArray) KeyBytes() []byte { + if b.len == 0 { + return nil + } + bc := b.byteCount() + res := make([]byte, bc+1) wordsBytes := b.Bytes() - return wordsBytes[32-b.byteCount():] + copy(res[:bc], wordsBytes[32-bc:]) + res[bc] = b.len + return res } -// PutActiveBytes writes the active bytes into dst (which must be at least 32 bytes) -// and returns the populated sub-slice. No heap allocation occurs because the -// backing array is owned by the caller. -func (b *BitArray) PutActiveBytes(dst *[32]byte) []byte { +// PutKeyBytes writes the key encoding (active bytes followed by length byte) +// into dst and returns the populated sub-slice. The empty path returns dst[:0] +// without touching dst. For non-empty paths dst must have len >= 33 (32 packed +// bytes for 248 bits + 1 length byte). +func (b *BitArray) PutKeyBytes(dst []byte) []byte { + if b.len == 0 { + return dst[:0] + } + _ = dst[32] // bounds check hint binary.BigEndian.PutUint64(dst[0:8], b.words[3]) binary.BigEndian.PutUint64(dst[8:16], b.words[2]) binary.BigEndian.PutUint64(dst[16:24], b.words[1]) binary.BigEndian.PutUint64(dst[24:32], b.words[0]) - return dst[32-b.byteCount():] + bc := b.byteCount() + copy(dst, dst[32-bc:32]) + dst[bc] = b.len + return dst[:bc+1] } // bitFromLSB returns the bit value at position n, where n = 0 is LSB. diff --git a/trie/bintrie/internal_node.go b/trie/bintrie/internal_node.go index 3d7010a194..992384da3e 100644 --- a/trie/bintrie/internal_node.go +++ b/trie/bintrie/internal_node.go @@ -29,7 +29,7 @@ func keyToPath(depth int, key []byte) ([]byte, error) { keyLen := min(len(key), 31) ba := new(BitArray).SetBytes(uint8(keyLen*8), key[:keyLen]) path := new(BitArray).MSBs(ba, uint8(depth+1)) - return path.ActiveBytes(), nil + return path.KeyBytes(), nil } // Invariant: dirty=false implies mustRecompute=false. Every mutation that diff --git a/trie/bintrie/iterator.go b/trie/bintrie/iterator.go index 88622ddaae..e678ee310d 100644 --- a/trie/bintrie/iterator.go +++ b/trie/bintrie/iterator.go @@ -201,7 +201,7 @@ func (it *binaryNodeIterator) Path() []byte { } path.AppendBit(&path, uint8(state.Index)) } - return path.ActiveBytes() + return path.KeyBytes() } func (it *binaryNodeIterator) NodeBlob() []byte { diff --git a/trie/bintrie/trie.go b/trie/bintrie/trie.go index eee84ad92a..653d419f44 100644 --- a/trie/bintrie/trie.go +++ b/trie/bintrie/trie.go @@ -321,8 +321,8 @@ func (t *BinaryTrie) Commit(_ bool) (common.Hash, *trienode.NodeSet) { var rootPath BitArray t.store.collectNodes(t.store.root, rootPath, func(path BitArray, hash common.Hash, serialized []byte) { - var buf [32]byte - pathBytes := path.PutActiveBytes(&buf) + var buf [33]byte + pathBytes := path.PutKeyBytes(buf[:]) nodeset.AddNode(pathBytes, trienode.NewNodeWithPrev(hash, serialized, t.tracer.Get(pathBytes))) }, t.groupDepth) return t.Hash(), nodeset