diff --git a/rlp/raw.go b/rlp/raw.go index cec90346a1..114037df78 100644 --- a/rlp/raw.go +++ b/rlp/raw.go @@ -152,6 +152,35 @@ func CountValues(b []byte) (int, error) { return i, nil } +// SplitListValues extracts the raw elements from the list RLP-encoding blob. +func SplitListValues(b []byte) ([][]byte, error) { + b, _, err := SplitList(b) + if err != nil { + return nil, err + } + var elements [][]byte + for len(b) > 0 { + _, tagsize, size, err := readKind(b) + if err != nil { + return nil, err + } + elements = append(elements, b[:tagsize+size]) + b = b[tagsize+size:] + } + return elements, nil +} + +// MergeListValues takes a list of raw elements and rlp-encodes them as list. +func MergeListValues(elems [][]byte) ([]byte, error) { + w := NewEncoderBuffer(nil) + offset := w.List() + for _, elem := range elems { + w.Write(elem) + } + w.ListEnd(offset) + return w.ToBytes(), nil +} + func readKind(buf []byte) (k Kind, tagsize, contentsize uint64, err error) { if len(buf) == 0 { return 0, 0, 0, io.ErrUnexpectedEOF diff --git a/rlp/raw_test.go b/rlp/raw_test.go index 7b3255eca3..2ed77b384c 100644 --- a/rlp/raw_test.go +++ b/rlp/raw_test.go @@ -336,3 +336,269 @@ func TestBytesSize(t *testing.T) { } } } + +func TestSplitListValues(t *testing.T) { + tests := []struct { + name string + input string // hex-encoded RLP list + want []string // hex-encoded expected elements + wantErr error + }{ + { + name: "empty list", + input: "C0", + want: []string{}, + }, + { + name: "single byte element", + input: "C101", + want: []string{"01"}, + }, + { + name: "single empty string", + input: "C180", + want: []string{"80"}, + }, + { + name: "two byte elements", + input: "C20102", + want: []string{"01", "02"}, + }, + { + name: "three elements", + input: "C3010203", + want: []string{"01", "02", "03"}, + }, + { + name: "mixed size elements", + input: "C80182020283030303", + want: []string{"01", "820202", "83030303"}, + }, + { + name: "string elements", + input: "C88363617483646F67", + want: []string{"83636174", "83646F67"}, // cat,dog + }, + { + name: "nested list element", + input: "C4C3010203", // [[1,2,3]] + want: []string{"C3010203"}, // [1,2,3] + }, + { + name: "multiple nested lists", + input: "C6C20102C20304", // [[1,2],[3,4]] + want: []string{"C20102", "C20304"}, // [1,2], [3,4] + }, + { + name: "large list", + input: "C6010203040506", + want: []string{"01", "02", "03", "04", "05", "06"}, + }, + { + name: "list with empty strings", + input: "C3808080", + want: []string{"80", "80", "80"}, + }, + // Error cases + { + name: "single byte", + input: "01", + wantErr: ErrExpectedList, + }, + { + name: "string", + input: "83636174", + wantErr: ErrExpectedList, + }, + { + name: "empty input", + input: "", + wantErr: io.ErrUnexpectedEOF, + }, + { + name: "invalid list - value too large", + input: "C60102030405", + wantErr: ErrValueTooLarge, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := SplitListValues(unhex(tt.input)) + if !errors.Is(err, tt.wantErr) { + t.Errorf("SplitListValues() error = %v, wantErr %v", err, tt.wantErr) + return + } + if err != nil { + return + } + if len(got) != len(tt.want) { + t.Errorf("SplitListValues() got %d elements, want %d", len(got), len(tt.want)) + return + } + for i, elem := range got { + want := unhex(tt.want[i]) + if !bytes.Equal(elem, want) { + t.Errorf("SplitListValues() element[%d] = %x, want %x", i, elem, want) + } + } + }) + } +} + +func TestMergeListValues(t *testing.T) { + tests := []struct { + name string + elems []string // hex-encoded RLP elements + want string // hex-encoded expected result + wantErr error + }{ + { + name: "empty list", + elems: []string{}, + want: "C0", + }, + { + name: "single byte element", + elems: []string{"01"}, + want: "C101", + }, + { + name: "single empty string", + elems: []string{"80"}, + want: "C180", + }, + { + name: "two byte elements", + elems: []string{"01", "02"}, + want: "C20102", + }, + { + name: "three elements", + elems: []string{"01", "02", "03"}, + want: "C3010203", + }, + { + name: "mixed size elements", + elems: []string{"01", "820202", "83030303"}, + want: "C80182020283030303", + }, + { + name: "string elements", + elems: []string{"83636174", "83646F67"}, // cat, dog + want: "C88363617483646F67", + }, + { + name: "nested list element", + elems: []string{"C20102", "03"}, // [[1, 2], 3] + want: "C4C2010203", + }, + { + name: "multiple nested lists", + elems: []string{"C20102", "C3030405"}, // [[1,2],[3,4,5]], + want: "C7C20102C3030405", + }, + { + name: "large list", + elems: []string{"01", "02", "03", "04", "05", "06"}, + want: "C6010203040506", + }, + { + name: "list with empty strings", + elems: []string{"80", "80", "80"}, + want: "C3808080", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + elems := make([][]byte, len(tt.elems)) + for i, s := range tt.elems { + elems[i] = unhex(s) + } + got, err := MergeListValues(elems) + if !errors.Is(err, tt.wantErr) { + t.Errorf("MergeListValues() error = %v, wantErr %v", err, tt.wantErr) + return + } + if err != nil { + return + } + want := unhex(tt.want) + if !bytes.Equal(got, want) { + t.Errorf("MergeListValues() = %x, want %x", got, want) + } + }) + } +} + +func TestSplitMergeList(t *testing.T) { + tests := []struct { + name string + input string // hex-encoded RLP list + }{ + { + name: "empty list", + input: "C0", + }, + { + name: "single byte element", + input: "C101", + }, + { + name: "two byte elements", + input: "C20102", + }, + { + name: "three elements", + input: "C3010203", + }, + { + name: "mixed size elements", + input: "C80182020283030303", + }, + { + name: "string elements", + input: "C88363617483646F67", // [cat, dog] + }, + { + name: "nested list element", + input: "C4C2010203", // [[1,2],3] + }, + { + name: "multiple nested lists", + input: "C6C20102C20304", // [[1,2],[3,4]] + }, + { + name: "large list", + input: "C6010203040506", // [1,2,3,4,5,6] + }, + { + name: "list with empty strings", + input: "C3808080", // ["", "", ""] + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + original := unhex(tt.input) + + // Split the list + elements, err := SplitListValues(original) + if err != nil { + t.Fatalf("SplitListValues() error = %v", err) + } + + // Merge back + merged, err := MergeListValues(elements) + if err != nil { + t.Fatalf("MergeListValues() error = %v", err) + } + + // The merged result should match the original + if !bytes.Equal(merged, original) { + t.Errorf("Round trip failed: original = %x, merged = %x", original, merged) + } + }) + } +} diff --git a/trie/node.go b/trie/node.go index 74fac4fd4e..3f14f07d63 100644 --- a/trie/node.go +++ b/trie/node.go @@ -17,6 +17,7 @@ package trie import ( + "bytes" "fmt" "io" "strings" @@ -242,6 +243,74 @@ func decodeRef(buf []byte) (node, []byte, error) { } } +// decodeNodeElements parses the RLP encoding of a trie node and returns all the +// elements in raw byte format. +// +// For full node, it returns a slice of 17 elements; +// For short node, it returns a slice of 2 elements; +func decodeNodeElements(buf []byte) ([][]byte, error) { + if len(buf) == 0 { + return nil, io.ErrUnexpectedEOF + } + return rlp.SplitListValues(buf) +} + +// encodeNodeElements encodes the provided node elements into a rlp list. +func encodeNodeElements(elements [][]byte) ([]byte, error) { + if len(elements) != 2 && len(elements) != 17 { + return nil, fmt.Errorf("invalid number of elements: %d", len(elements)) + } + return rlp.MergeListValues(elements) +} + +// NodeDifference accepts two RLP-encoding nodes and figures out the difference +// between them. +// +// An error is returned if any of the provided blob is nil, or the type of nodes +// are different. +func NodeDifference(oldvalue []byte, newvalue []byte) (int, []int, [][]byte, error) { + oldElems, err := decodeNodeElements(oldvalue) + if err != nil { + return 0, nil, nil, err + } + newElems, err := decodeNodeElements(newvalue) + if err != nil { + return 0, nil, nil, err + } + if len(oldElems) != len(newElems) { + return 0, nil, nil, fmt.Errorf("different node type, old elements: %d, new elements: %d", len(oldElems), len(newElems)) + } + var ( + indices = make([]int, 0, len(oldElems)) + diff = make([][]byte, 0, len(oldElems)) + ) + for i := 0; i < len(oldElems); i++ { + if !bytes.Equal(oldElems[i], newElems[i]) { + indices = append(indices, i) + diff = append(diff, oldElems[i]) + } + } + return len(oldElems), indices, diff, nil +} + +// ReassembleNode accepts a RLP-encoding node along with a set of mutations, +// applying the modification diffs according to the indices and re-assemble. +func ReassembleNode(blob []byte, mutations [][][]byte, indices [][]int) ([]byte, error) { + if len(mutations) == 0 && len(indices) == 0 { + return blob, nil + } + elements, err := decodeNodeElements(blob) + if err != nil { + return nil, err + } + for i := 0; i < len(mutations); i++ { + for j, pos := range indices[i] { + elements[pos] = mutations[i][j] + } + } + return encodeNodeElements(elements) +} + // wraps a decoding error with information about the path to the // invalid child node (for debugging encoding issues). type decodeError struct { diff --git a/trie/node_test.go b/trie/node_test.go index 9b8b33748f..875f6e38dc 100644 --- a/trie/node_test.go +++ b/trie/node_test.go @@ -18,9 +18,12 @@ package trie import ( "bytes" + "math/rand" + "reflect" "testing" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/internal/testrand" "github.com/ethereum/go-ethereum/rlp" ) @@ -94,6 +97,286 @@ func TestDecodeFullNode(t *testing.T) { } } +func makeTestLeafNode(small bool) []byte { + l := leafNodeEncoder{} + l.Key = hexToCompact(keybytesToHex(testrand.Bytes(10))) + if small { + l.Val = testrand.Bytes(10) + } else { + l.Val = testrand.Bytes(32) + } + buf := rlp.NewEncoderBuffer(nil) + l.encode(buf) + return buf.ToBytes() +} + +func makeTestFullNode(small bool) []byte { + n := fullnodeEncoder{} + for i := 0; i < 16; i++ { + switch rand.Intn(3) { + case 0: + // write nil + case 1: + // write hash + n.Children[i] = testrand.Bytes(32) + case 2: + // write embedded node + n.Children[i] = makeTestLeafNode(small) + } + } + n.Children[16] = testrand.Bytes(32) // value + buf := rlp.NewEncoderBuffer(nil) + n.encode(buf) + return buf.ToBytes() +} + +func TestEncodeDecodeNodeElements(t *testing.T) { + var nodes [][]byte + nodes = append(nodes, makeTestFullNode(true)) + nodes = append(nodes, makeTestFullNode(false)) + nodes = append(nodes, makeTestLeafNode(true)) + nodes = append(nodes, makeTestLeafNode(false)) + + for _, blob := range nodes { + elements, err := decodeNodeElements(blob) + if err != nil { + t.Fatalf("Failed to decode node elements: %v", err) + } + enc, err := encodeNodeElements(elements) + if err != nil { + t.Fatalf("Failed to encode node elements: %v", err) + } + if !bytes.Equal(enc, blob) { + t.Fatalf("Unexpected encoded node element, want: %v, got: %v", blob, enc) + } + } +} + +func makeTestLeafNodePair() ([]byte, []byte, [][]byte, []int) { + var ( + na = leafNodeEncoder{} + nb = leafNodeEncoder{} + ) + key := keybytesToHex(testrand.Bytes(10)) + na.Key = hexToCompact(key) + nb.Key = hexToCompact(key) + + valA := testrand.Bytes(32) + valB := testrand.Bytes(32) + na.Val = valA + nb.Val = valB + + bufa, bufb := rlp.NewEncoderBuffer(nil), rlp.NewEncoderBuffer(nil) + na.encode(bufa) + nb.encode(bufb) + diff, _ := rlp.EncodeToBytes(valA) + return bufa.ToBytes(), bufb.ToBytes(), [][]byte{diff}, []int{1} +} + +func makeTestFullNodePair() ([]byte, []byte, [][]byte, []int) { + var ( + na = fullnodeEncoder{} + nb = fullnodeEncoder{} + indices []int + values [][]byte + ) + for i := 0; i < 16; i++ { + switch rand.Intn(3) { + case 0: + // write nil + case 1: + // write same + var child []byte + if rand.Intn(2) == 0 { + child = testrand.Bytes(32) // hashnode + } else { + child = makeTestLeafNode(true) // embedded node + } + na.Children[i] = child + nb.Children[i] = child + case 2: + // write different + var ( + va []byte + diff []byte + ) + rnd := rand.Intn(3) + if rnd == 0 { + va = testrand.Bytes(32) // hashnode + diff, _ = rlp.EncodeToBytes(va) + } else if rnd == 1 { + va = makeTestLeafNode(true) // embedded node + diff = va + } else { + va = nil + diff = rlp.EmptyString + } + vb := testrand.Bytes(32) // hashnode + na.Children[i] = va + nb.Children[i] = vb + + indices = append(indices, i) + values = append(values, diff) + } + } + na.Children[16] = nil + nb.Children[16] = nil + + bufa, bufb := rlp.NewEncoderBuffer(nil), rlp.NewEncoderBuffer(nil) + na.encode(bufa) + nb.encode(bufb) + return bufa.ToBytes(), bufb.ToBytes(), values, indices +} + +func TestNodeDifference(t *testing.T) { + type testsuite struct { + old []byte + new []byte + expErr bool + expIndices []int + expValues [][]byte + } + var tests = []testsuite{ + // Invalid node data + { + old: nil, new: nil, expErr: true, + }, + { + old: testrand.Bytes(32), new: nil, expErr: true, + }, + { + old: nil, new: testrand.Bytes(32), expErr: true, + }, + { + old: testrand.Bytes(32), new: testrand.Bytes(32), expErr: true, + }, + // Different node type + { + old: makeTestLeafNode(true), new: makeTestFullNode(true), expErr: true, + }, + } + for range 10 { + va, vb, elements, indices := makeTestLeafNodePair() + tests = append(tests, testsuite{ + old: va, + new: vb, + expErr: false, + expIndices: indices, + expValues: elements, + }) + } + for range 10 { + va, vb, elements, indices := makeTestFullNodePair() + tests = append(tests, testsuite{ + old: va, + new: vb, + expErr: false, + expIndices: indices, + expValues: elements, + }) + } + + for _, test := range tests { + _, indices, values, err := NodeDifference(test.old, test.new) + if test.expErr && err == nil { + t.Fatal("Expect error, got nil") + } + if !test.expErr && err != nil { + t.Fatalf("Unexpect error, %v", err) + } + if err == nil { + if !reflect.DeepEqual(indices, test.expIndices) { + t.Fatalf("Unexpected indices, want: %v, got: %v", test.expIndices, indices) + } + if !reflect.DeepEqual(values, test.expValues) { + t.Fatalf("Unexpected values, want: %v, got: %v", test.expValues, values) + } + } + } +} + +func TestReassembleFullNode(t *testing.T) { + var fn fullnodeEncoder + for i := 0; i < 16; i++ { + if rand.Intn(2) == 0 { + fn.Children[i] = testrand.Bytes(32) + } + } + buf := rlp.NewEncoderBuffer(nil) + fn.encode(buf) + enc := buf.ToBytes() + + // Generate a list of diffs + var ( + values [][][]byte + indices [][]int + ) + for i := 0; i < 10; i++ { + var ( + pos = make(map[int]struct{}) + poslist []int + valuelist [][]byte + ) + for j := 0; j < 3; j++ { + p := rand.Intn(16) + if _, ok := pos[p]; ok { + continue + } + pos[p] = struct{}{} + + nh := testrand.Bytes(32) + diff, _ := rlp.EncodeToBytes(nh) + poslist = append(poslist, p) + valuelist = append(valuelist, diff) + fn.Children[p] = nh + } + values = append(values, valuelist) + indices = append(indices, poslist) + } + reassembled, err := ReassembleNode(enc, values, indices) + if err != nil { + t.Fatalf("Failed to re-assemble full node %v", err) + } + buf2 := rlp.NewEncoderBuffer(nil) + fn.encode(buf2) + enc2 := buf2.ToBytes() + if !reflect.DeepEqual(enc2, reassembled) { + t.Fatalf("Unexpeted reassembled node") + } +} + +func TestReassembleShortNode(t *testing.T) { + var ln leafNodeEncoder + ln.Key = hexToCompact(keybytesToHex(testrand.Bytes(10))) + ln.Val = testrand.Bytes(10) + buf := rlp.NewEncoderBuffer(nil) + ln.encode(buf) + enc := buf.ToBytes() + + // Generate a list of diffs + var ( + values [][][]byte + indices [][]int + ) + for i := 0; i < 10; i++ { + val := testrand.Bytes(10) + ln.Val = val + diff, _ := rlp.EncodeToBytes(val) + values = append(values, [][]byte{diff}) + indices = append(indices, []int{1}) + } + reassembled, err := ReassembleNode(enc, values, indices) + if err != nil { + t.Fatalf("Failed to re-assemble full node %v", err) + } + buf2 := rlp.NewEncoderBuffer(nil) + ln.encode(buf2) + enc2 := buf2.ToBytes() + if !reflect.DeepEqual(enc2, reassembled) { + t.Fatalf("Unexpeted reassembled node") + } +} + // goos: darwin // goarch: arm64 // pkg: github.com/ethereum/go-ethereum/trie diff --git a/triedb/pathdb/nodes.go b/triedb/pathdb/nodes.go index c6f9e7aece..4eede439e4 100644 --- a/triedb/pathdb/nodes.go +++ b/triedb/pathdb/nodes.go @@ -14,12 +14,14 @@ // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . +// nolint:unused package pathdb import ( "bytes" "errors" "fmt" + "hash/fnv" "io" "maps" @@ -30,6 +32,7 @@ import ( "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/trienode" ) @@ -424,3 +427,272 @@ func (s *nodeSetWithOrigin) decode(r *rlp.Stream) error { s.computeSize() return nil } + +// encodeNodeCompressed encodes the trie node differences between two consecutive +// versions into byte stream. The format is as below: +// +// - metadata byte layout (1 byte): +// +// ┌──── Bits (from MSB to LSB) ───┐ +// │ 7 │ 6 │ 5 │ 4 │ 3 │ 2 │ 1 │ 0 │ +// └───────────────────────────────┘ +// │ │ │ │ │ │ │ └─ FlagA: set if value is encoded in compressed format +// │ │ │ │ │ │ └───── FlagB: set if no extended bitmap is present after the metadata byte +// │ │ │ │ │ └───────── FlagC: bitmap for node (only used when flagB == 1) +// │ │ │ │ └───────────── FlagD: bitmap for node (only used when flagB == 1) +// │ │ │ └───────────────── FlagE: reserved (marks the presence of the 16th child in a full node) +// │ │ └───────────────────── FlagF: reserved +// │ └───────────────────────── FlagG: reserved +// └───────────────────────────── FlagH: reserved +// +// Note: +// - If flagB is 1, the node refers to a shortNode; +// - flagC indicates whether the key of the shortNode is recorded. +// - flagD indicates whether the value of the shortNode is recorded. +// +// - If flagB is 0, the node refers to a fullNode; +// - each bit in extended bitmap indicates whether the corresponding +// child have been modified. +// +// Example: +// +// 0b_0000_1011 +// +// Bit0=1, Bit1=1 -> node in compressed format, no extended bitmap +// Bit2=0, Bit3=1 -> the key of a short node is not stored; its value is stored. +// +// - 2 bytes extended bitmap (only if the flagB in metadata is 0), each bit +// represents a corresponding child; +// +// - concatenation of original value of modified children along with its size; +func encodeNodeCompressed(addExtension bool, elements [][]byte, indices []int) []byte { + var ( + enc []byte + flag = byte(1) // The compression format indicator + ) + // Pre-allocate the byte slice for the node encoder + size := 1 + if addExtension { + size += 2 + } + for _, element := range elements { + size += len(element) + 1 + } + enc = make([]byte, 0, size) + + if !addExtension { + flag |= 2 // The embedded bitmap indicator + + // Embedded bitmap + for _, pos := range indices { + flag |= 1 << (pos + 2) + } + enc = append(enc, flag) + } else { + // Extended bitmap + bitmap := make([]byte, 2) // bitmaps for at most 16 children + for _, pos := range indices { + // Children[16] is only theoretically possible in the Merkle-Patricia-trie, + // in practice this field is never used in the Ethereum case. If it occurs, + // use the FlagE for marking the presence. + if pos >= 16 { + log.Warn("Unexpected 16th child encountered in a full node") + flag |= 1 << 4 // Use the reserved flagE + continue + } + bitIndex := uint(pos % 8) + bitmap[pos/8] |= 1 << bitIndex + } + enc = append(enc, flag) + enc = append(enc, bitmap...) + } + for _, element := range elements { + enc = append(enc, byte(len(element))) // 1 byte is sufficient for element size + enc = append(enc, element...) + } + return enc +} + +// encodeNodeFull encodes the full trie node value into byte stream. The format is +// as below: +// +// - metadata byte layout (1 byte): 0b0 +// - node value +func encodeNodeFull(value []byte) []byte { + enc := make([]byte, len(value)+1) + copy(enc[1:], value) + return enc +} + +// decodeNodeCompressed decodes the byte stream of compressed trie node +// back to the original elements and their indices. +// +// It assumes the byte stream contains a compressed format node. +func decodeNodeCompressed(data []byte) ([][]byte, []int, error) { + if len(data) < 1 { + return nil, nil, errors.New("invalid data: too short") + } + flag := data[0] + if flag&byte(1) == 0 { + return nil, nil, errors.New("invalid data: full node value") + } + noExtend := flag&byte(2) != 0 + + // Reconstruct indices from bitmap + var indices []int + if noExtend { + if flag&byte(4) != 0 { // flagC + indices = append(indices, 0) + } + if flag&byte(8) != 0 { // flagD + indices = append(indices, 1) + } + data = data[1:] + } else { + if len(data) < 3 { + return nil, nil, errors.New("invalid data: too short") + } + bitmap := data[1:3] + for index, b := range bitmap { + for bitIdx := 0; bitIdx < 8; bitIdx++ { + if b&(1<