rlp, trie, triedb/pathdb: compress trienode history (#32913)
Some checks are pending
/ Linux Build (push) Waiting to run
/ Linux Build (arm) (push) Waiting to run
/ Keeper Build (push) Waiting to run
/ Windows Build (push) Waiting to run
/ Docker Image (push) Waiting to run

This pull request introduces a mechanism to compress trienode history by
storing only the node diffs between consecutive versions.

- For full nodes, only the modified children are recorded in the history;
- For short nodes, only the modified value is stored;

If the node type has changed, or if the node is newly created or
deleted, the entire node value is stored instead.

To mitigate the overhead of reassembling nodes from diffs during history
reads, checkpoints are introduced by periodically storing full node values.

The current checkpoint interval is set to every 16 mutations, though
this parameter may be made configurable in the future.
This commit is contained in:
rjl493456442 2026-01-08 21:58:02 +08:00 committed by GitHub
parent 52f998d5ec
commit f51870e40e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 967 additions and 0 deletions

View file

@ -152,6 +152,35 @@ func CountValues(b []byte) (int, error) {
return i, nil
}
// SplitListValues extracts the raw elements from the list RLP-encoding blob.
func SplitListValues(b []byte) ([][]byte, error) {
b, _, err := SplitList(b)
if err != nil {
return nil, err
}
var elements [][]byte
for len(b) > 0 {
_, tagsize, size, err := readKind(b)
if err != nil {
return nil, err
}
elements = append(elements, b[:tagsize+size])
b = b[tagsize+size:]
}
return elements, nil
}
// MergeListValues takes a list of raw elements and rlp-encodes them as list.
func MergeListValues(elems [][]byte) ([]byte, error) {
w := NewEncoderBuffer(nil)
offset := w.List()
for _, elem := range elems {
w.Write(elem)
}
w.ListEnd(offset)
return w.ToBytes(), nil
}
func readKind(buf []byte) (k Kind, tagsize, contentsize uint64, err error) {
if len(buf) == 0 {
return 0, 0, 0, io.ErrUnexpectedEOF

View file

@ -336,3 +336,269 @@ func TestBytesSize(t *testing.T) {
}
}
}
func TestSplitListValues(t *testing.T) {
tests := []struct {
name string
input string // hex-encoded RLP list
want []string // hex-encoded expected elements
wantErr error
}{
{
name: "empty list",
input: "C0",
want: []string{},
},
{
name: "single byte element",
input: "C101",
want: []string{"01"},
},
{
name: "single empty string",
input: "C180",
want: []string{"80"},
},
{
name: "two byte elements",
input: "C20102",
want: []string{"01", "02"},
},
{
name: "three elements",
input: "C3010203",
want: []string{"01", "02", "03"},
},
{
name: "mixed size elements",
input: "C80182020283030303",
want: []string{"01", "820202", "83030303"},
},
{
name: "string elements",
input: "C88363617483646F67",
want: []string{"83636174", "83646F67"}, // cat,dog
},
{
name: "nested list element",
input: "C4C3010203", // [[1,2,3]]
want: []string{"C3010203"}, // [1,2,3]
},
{
name: "multiple nested lists",
input: "C6C20102C20304", // [[1,2],[3,4]]
want: []string{"C20102", "C20304"}, // [1,2], [3,4]
},
{
name: "large list",
input: "C6010203040506",
want: []string{"01", "02", "03", "04", "05", "06"},
},
{
name: "list with empty strings",
input: "C3808080",
want: []string{"80", "80", "80"},
},
// Error cases
{
name: "single byte",
input: "01",
wantErr: ErrExpectedList,
},
{
name: "string",
input: "83636174",
wantErr: ErrExpectedList,
},
{
name: "empty input",
input: "",
wantErr: io.ErrUnexpectedEOF,
},
{
name: "invalid list - value too large",
input: "C60102030405",
wantErr: ErrValueTooLarge,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := SplitListValues(unhex(tt.input))
if !errors.Is(err, tt.wantErr) {
t.Errorf("SplitListValues() error = %v, wantErr %v", err, tt.wantErr)
return
}
if err != nil {
return
}
if len(got) != len(tt.want) {
t.Errorf("SplitListValues() got %d elements, want %d", len(got), len(tt.want))
return
}
for i, elem := range got {
want := unhex(tt.want[i])
if !bytes.Equal(elem, want) {
t.Errorf("SplitListValues() element[%d] = %x, want %x", i, elem, want)
}
}
})
}
}
func TestMergeListValues(t *testing.T) {
tests := []struct {
name string
elems []string // hex-encoded RLP elements
want string // hex-encoded expected result
wantErr error
}{
{
name: "empty list",
elems: []string{},
want: "C0",
},
{
name: "single byte element",
elems: []string{"01"},
want: "C101",
},
{
name: "single empty string",
elems: []string{"80"},
want: "C180",
},
{
name: "two byte elements",
elems: []string{"01", "02"},
want: "C20102",
},
{
name: "three elements",
elems: []string{"01", "02", "03"},
want: "C3010203",
},
{
name: "mixed size elements",
elems: []string{"01", "820202", "83030303"},
want: "C80182020283030303",
},
{
name: "string elements",
elems: []string{"83636174", "83646F67"}, // cat, dog
want: "C88363617483646F67",
},
{
name: "nested list element",
elems: []string{"C20102", "03"}, // [[1, 2], 3]
want: "C4C2010203",
},
{
name: "multiple nested lists",
elems: []string{"C20102", "C3030405"}, // [[1,2],[3,4,5]],
want: "C7C20102C3030405",
},
{
name: "large list",
elems: []string{"01", "02", "03", "04", "05", "06"},
want: "C6010203040506",
},
{
name: "list with empty strings",
elems: []string{"80", "80", "80"},
want: "C3808080",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
elems := make([][]byte, len(tt.elems))
for i, s := range tt.elems {
elems[i] = unhex(s)
}
got, err := MergeListValues(elems)
if !errors.Is(err, tt.wantErr) {
t.Errorf("MergeListValues() error = %v, wantErr %v", err, tt.wantErr)
return
}
if err != nil {
return
}
want := unhex(tt.want)
if !bytes.Equal(got, want) {
t.Errorf("MergeListValues() = %x, want %x", got, want)
}
})
}
}
func TestSplitMergeList(t *testing.T) {
tests := []struct {
name string
input string // hex-encoded RLP list
}{
{
name: "empty list",
input: "C0",
},
{
name: "single byte element",
input: "C101",
},
{
name: "two byte elements",
input: "C20102",
},
{
name: "three elements",
input: "C3010203",
},
{
name: "mixed size elements",
input: "C80182020283030303",
},
{
name: "string elements",
input: "C88363617483646F67", // [cat, dog]
},
{
name: "nested list element",
input: "C4C2010203", // [[1,2],3]
},
{
name: "multiple nested lists",
input: "C6C20102C20304", // [[1,2],[3,4]]
},
{
name: "large list",
input: "C6010203040506", // [1,2,3,4,5,6]
},
{
name: "list with empty strings",
input: "C3808080", // ["", "", ""]
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
original := unhex(tt.input)
// Split the list
elements, err := SplitListValues(original)
if err != nil {
t.Fatalf("SplitListValues() error = %v", err)
}
// Merge back
merged, err := MergeListValues(elements)
if err != nil {
t.Fatalf("MergeListValues() error = %v", err)
}
// The merged result should match the original
if !bytes.Equal(merged, original) {
t.Errorf("Round trip failed: original = %x, merged = %x", original, merged)
}
})
}
}

View file

@ -17,6 +17,7 @@
package trie
import (
"bytes"
"fmt"
"io"
"strings"
@ -242,6 +243,74 @@ func decodeRef(buf []byte) (node, []byte, error) {
}
}
// decodeNodeElements parses the RLP encoding of a trie node and returns all the
// elements in raw byte format.
//
// For full node, it returns a slice of 17 elements;
// For short node, it returns a slice of 2 elements;
func decodeNodeElements(buf []byte) ([][]byte, error) {
if len(buf) == 0 {
return nil, io.ErrUnexpectedEOF
}
return rlp.SplitListValues(buf)
}
// encodeNodeElements encodes the provided node elements into a rlp list.
func encodeNodeElements(elements [][]byte) ([]byte, error) {
if len(elements) != 2 && len(elements) != 17 {
return nil, fmt.Errorf("invalid number of elements: %d", len(elements))
}
return rlp.MergeListValues(elements)
}
// NodeDifference accepts two RLP-encoding nodes and figures out the difference
// between them.
//
// An error is returned if any of the provided blob is nil, or the type of nodes
// are different.
func NodeDifference(oldvalue []byte, newvalue []byte) (int, []int, [][]byte, error) {
oldElems, err := decodeNodeElements(oldvalue)
if err != nil {
return 0, nil, nil, err
}
newElems, err := decodeNodeElements(newvalue)
if err != nil {
return 0, nil, nil, err
}
if len(oldElems) != len(newElems) {
return 0, nil, nil, fmt.Errorf("different node type, old elements: %d, new elements: %d", len(oldElems), len(newElems))
}
var (
indices = make([]int, 0, len(oldElems))
diff = make([][]byte, 0, len(oldElems))
)
for i := 0; i < len(oldElems); i++ {
if !bytes.Equal(oldElems[i], newElems[i]) {
indices = append(indices, i)
diff = append(diff, oldElems[i])
}
}
return len(oldElems), indices, diff, nil
}
// ReassembleNode accepts a RLP-encoding node along with a set of mutations,
// applying the modification diffs according to the indices and re-assemble.
func ReassembleNode(blob []byte, mutations [][][]byte, indices [][]int) ([]byte, error) {
if len(mutations) == 0 && len(indices) == 0 {
return blob, nil
}
elements, err := decodeNodeElements(blob)
if err != nil {
return nil, err
}
for i := 0; i < len(mutations); i++ {
for j, pos := range indices[i] {
elements[pos] = mutations[i][j]
}
}
return encodeNodeElements(elements)
}
// wraps a decoding error with information about the path to the
// invalid child node (for debugging encoding issues).
type decodeError struct {

View file

@ -18,9 +18,12 @@ package trie
import (
"bytes"
"math/rand"
"reflect"
"testing"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/internal/testrand"
"github.com/ethereum/go-ethereum/rlp"
)
@ -94,6 +97,286 @@ func TestDecodeFullNode(t *testing.T) {
}
}
func makeTestLeafNode(small bool) []byte {
l := leafNodeEncoder{}
l.Key = hexToCompact(keybytesToHex(testrand.Bytes(10)))
if small {
l.Val = testrand.Bytes(10)
} else {
l.Val = testrand.Bytes(32)
}
buf := rlp.NewEncoderBuffer(nil)
l.encode(buf)
return buf.ToBytes()
}
func makeTestFullNode(small bool) []byte {
n := fullnodeEncoder{}
for i := 0; i < 16; i++ {
switch rand.Intn(3) {
case 0:
// write nil
case 1:
// write hash
n.Children[i] = testrand.Bytes(32)
case 2:
// write embedded node
n.Children[i] = makeTestLeafNode(small)
}
}
n.Children[16] = testrand.Bytes(32) // value
buf := rlp.NewEncoderBuffer(nil)
n.encode(buf)
return buf.ToBytes()
}
func TestEncodeDecodeNodeElements(t *testing.T) {
var nodes [][]byte
nodes = append(nodes, makeTestFullNode(true))
nodes = append(nodes, makeTestFullNode(false))
nodes = append(nodes, makeTestLeafNode(true))
nodes = append(nodes, makeTestLeafNode(false))
for _, blob := range nodes {
elements, err := decodeNodeElements(blob)
if err != nil {
t.Fatalf("Failed to decode node elements: %v", err)
}
enc, err := encodeNodeElements(elements)
if err != nil {
t.Fatalf("Failed to encode node elements: %v", err)
}
if !bytes.Equal(enc, blob) {
t.Fatalf("Unexpected encoded node element, want: %v, got: %v", blob, enc)
}
}
}
func makeTestLeafNodePair() ([]byte, []byte, [][]byte, []int) {
var (
na = leafNodeEncoder{}
nb = leafNodeEncoder{}
)
key := keybytesToHex(testrand.Bytes(10))
na.Key = hexToCompact(key)
nb.Key = hexToCompact(key)
valA := testrand.Bytes(32)
valB := testrand.Bytes(32)
na.Val = valA
nb.Val = valB
bufa, bufb := rlp.NewEncoderBuffer(nil), rlp.NewEncoderBuffer(nil)
na.encode(bufa)
nb.encode(bufb)
diff, _ := rlp.EncodeToBytes(valA)
return bufa.ToBytes(), bufb.ToBytes(), [][]byte{diff}, []int{1}
}
func makeTestFullNodePair() ([]byte, []byte, [][]byte, []int) {
var (
na = fullnodeEncoder{}
nb = fullnodeEncoder{}
indices []int
values [][]byte
)
for i := 0; i < 16; i++ {
switch rand.Intn(3) {
case 0:
// write nil
case 1:
// write same
var child []byte
if rand.Intn(2) == 0 {
child = testrand.Bytes(32) // hashnode
} else {
child = makeTestLeafNode(true) // embedded node
}
na.Children[i] = child
nb.Children[i] = child
case 2:
// write different
var (
va []byte
diff []byte
)
rnd := rand.Intn(3)
if rnd == 0 {
va = testrand.Bytes(32) // hashnode
diff, _ = rlp.EncodeToBytes(va)
} else if rnd == 1 {
va = makeTestLeafNode(true) // embedded node
diff = va
} else {
va = nil
diff = rlp.EmptyString
}
vb := testrand.Bytes(32) // hashnode
na.Children[i] = va
nb.Children[i] = vb
indices = append(indices, i)
values = append(values, diff)
}
}
na.Children[16] = nil
nb.Children[16] = nil
bufa, bufb := rlp.NewEncoderBuffer(nil), rlp.NewEncoderBuffer(nil)
na.encode(bufa)
nb.encode(bufb)
return bufa.ToBytes(), bufb.ToBytes(), values, indices
}
func TestNodeDifference(t *testing.T) {
type testsuite struct {
old []byte
new []byte
expErr bool
expIndices []int
expValues [][]byte
}
var tests = []testsuite{
// Invalid node data
{
old: nil, new: nil, expErr: true,
},
{
old: testrand.Bytes(32), new: nil, expErr: true,
},
{
old: nil, new: testrand.Bytes(32), expErr: true,
},
{
old: testrand.Bytes(32), new: testrand.Bytes(32), expErr: true,
},
// Different node type
{
old: makeTestLeafNode(true), new: makeTestFullNode(true), expErr: true,
},
}
for range 10 {
va, vb, elements, indices := makeTestLeafNodePair()
tests = append(tests, testsuite{
old: va,
new: vb,
expErr: false,
expIndices: indices,
expValues: elements,
})
}
for range 10 {
va, vb, elements, indices := makeTestFullNodePair()
tests = append(tests, testsuite{
old: va,
new: vb,
expErr: false,
expIndices: indices,
expValues: elements,
})
}
for _, test := range tests {
_, indices, values, err := NodeDifference(test.old, test.new)
if test.expErr && err == nil {
t.Fatal("Expect error, got nil")
}
if !test.expErr && err != nil {
t.Fatalf("Unexpect error, %v", err)
}
if err == nil {
if !reflect.DeepEqual(indices, test.expIndices) {
t.Fatalf("Unexpected indices, want: %v, got: %v", test.expIndices, indices)
}
if !reflect.DeepEqual(values, test.expValues) {
t.Fatalf("Unexpected values, want: %v, got: %v", test.expValues, values)
}
}
}
}
func TestReassembleFullNode(t *testing.T) {
var fn fullnodeEncoder
for i := 0; i < 16; i++ {
if rand.Intn(2) == 0 {
fn.Children[i] = testrand.Bytes(32)
}
}
buf := rlp.NewEncoderBuffer(nil)
fn.encode(buf)
enc := buf.ToBytes()
// Generate a list of diffs
var (
values [][][]byte
indices [][]int
)
for i := 0; i < 10; i++ {
var (
pos = make(map[int]struct{})
poslist []int
valuelist [][]byte
)
for j := 0; j < 3; j++ {
p := rand.Intn(16)
if _, ok := pos[p]; ok {
continue
}
pos[p] = struct{}{}
nh := testrand.Bytes(32)
diff, _ := rlp.EncodeToBytes(nh)
poslist = append(poslist, p)
valuelist = append(valuelist, diff)
fn.Children[p] = nh
}
values = append(values, valuelist)
indices = append(indices, poslist)
}
reassembled, err := ReassembleNode(enc, values, indices)
if err != nil {
t.Fatalf("Failed to re-assemble full node %v", err)
}
buf2 := rlp.NewEncoderBuffer(nil)
fn.encode(buf2)
enc2 := buf2.ToBytes()
if !reflect.DeepEqual(enc2, reassembled) {
t.Fatalf("Unexpeted reassembled node")
}
}
func TestReassembleShortNode(t *testing.T) {
var ln leafNodeEncoder
ln.Key = hexToCompact(keybytesToHex(testrand.Bytes(10)))
ln.Val = testrand.Bytes(10)
buf := rlp.NewEncoderBuffer(nil)
ln.encode(buf)
enc := buf.ToBytes()
// Generate a list of diffs
var (
values [][][]byte
indices [][]int
)
for i := 0; i < 10; i++ {
val := testrand.Bytes(10)
ln.Val = val
diff, _ := rlp.EncodeToBytes(val)
values = append(values, [][]byte{diff})
indices = append(indices, []int{1})
}
reassembled, err := ReassembleNode(enc, values, indices)
if err != nil {
t.Fatalf("Failed to re-assemble full node %v", err)
}
buf2 := rlp.NewEncoderBuffer(nil)
ln.encode(buf2)
enc2 := buf2.ToBytes()
if !reflect.DeepEqual(enc2, reassembled) {
t.Fatalf("Unexpeted reassembled node")
}
}
// goos: darwin
// goarch: arm64
// pkg: github.com/ethereum/go-ethereum/trie

View file

@ -14,12 +14,14 @@
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// nolint:unused
package pathdb
import (
"bytes"
"errors"
"fmt"
"hash/fnv"
"io"
"maps"
@ -30,6 +32,7 @@ import (
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
"github.com/ethereum/go-ethereum/trie/trienode"
)
@ -424,3 +427,272 @@ func (s *nodeSetWithOrigin) decode(r *rlp.Stream) error {
s.computeSize()
return nil
}
// encodeNodeCompressed encodes the trie node differences between two consecutive
// versions into byte stream. The format is as below:
//
// - metadata byte layout (1 byte):
//
// ┌──── Bits (from MSB to LSB) ───┐
// │ 7 │ 6 │ 5 │ 4 │ 3 │ 2 │ 1 │ 0 │
// └───────────────────────────────┘
// │ │ │ │ │ │ │ └─ FlagA: set if value is encoded in compressed format
// │ │ │ │ │ │ └───── FlagB: set if no extended bitmap is present after the metadata byte
// │ │ │ │ │ └───────── FlagC: bitmap for node (only used when flagB == 1)
// │ │ │ │ └───────────── FlagD: bitmap for node (only used when flagB == 1)
// │ │ │ └───────────────── FlagE: reserved (marks the presence of the 16th child in a full node)
// │ │ └───────────────────── FlagF: reserved
// │ └───────────────────────── FlagG: reserved
// └───────────────────────────── FlagH: reserved
//
// Note:
// - If flagB is 1, the node refers to a shortNode;
// - flagC indicates whether the key of the shortNode is recorded.
// - flagD indicates whether the value of the shortNode is recorded.
//
// - If flagB is 0, the node refers to a fullNode;
// - each bit in extended bitmap indicates whether the corresponding
// child have been modified.
//
// Example:
//
// 0b_0000_1011
//
// Bit0=1, Bit1=1 -> node in compressed format, no extended bitmap
// Bit2=0, Bit3=1 -> the key of a short node is not stored; its value is stored.
//
// - 2 bytes extended bitmap (only if the flagB in metadata is 0), each bit
// represents a corresponding child;
//
// - concatenation of original value of modified children along with its size;
func encodeNodeCompressed(addExtension bool, elements [][]byte, indices []int) []byte {
var (
enc []byte
flag = byte(1) // The compression format indicator
)
// Pre-allocate the byte slice for the node encoder
size := 1
if addExtension {
size += 2
}
for _, element := range elements {
size += len(element) + 1
}
enc = make([]byte, 0, size)
if !addExtension {
flag |= 2 // The embedded bitmap indicator
// Embedded bitmap
for _, pos := range indices {
flag |= 1 << (pos + 2)
}
enc = append(enc, flag)
} else {
// Extended bitmap
bitmap := make([]byte, 2) // bitmaps for at most 16 children
for _, pos := range indices {
// Children[16] is only theoretically possible in the Merkle-Patricia-trie,
// in practice this field is never used in the Ethereum case. If it occurs,
// use the FlagE for marking the presence.
if pos >= 16 {
log.Warn("Unexpected 16th child encountered in a full node")
flag |= 1 << 4 // Use the reserved flagE
continue
}
bitIndex := uint(pos % 8)
bitmap[pos/8] |= 1 << bitIndex
}
enc = append(enc, flag)
enc = append(enc, bitmap...)
}
for _, element := range elements {
enc = append(enc, byte(len(element))) // 1 byte is sufficient for element size
enc = append(enc, element...)
}
return enc
}
// encodeNodeFull encodes the full trie node value into byte stream. The format is
// as below:
//
// - metadata byte layout (1 byte): 0b0
// - node value
func encodeNodeFull(value []byte) []byte {
enc := make([]byte, len(value)+1)
copy(enc[1:], value)
return enc
}
// decodeNodeCompressed decodes the byte stream of compressed trie node
// back to the original elements and their indices.
//
// It assumes the byte stream contains a compressed format node.
func decodeNodeCompressed(data []byte) ([][]byte, []int, error) {
if len(data) < 1 {
return nil, nil, errors.New("invalid data: too short")
}
flag := data[0]
if flag&byte(1) == 0 {
return nil, nil, errors.New("invalid data: full node value")
}
noExtend := flag&byte(2) != 0
// Reconstruct indices from bitmap
var indices []int
if noExtend {
if flag&byte(4) != 0 { // flagC
indices = append(indices, 0)
}
if flag&byte(8) != 0 { // flagD
indices = append(indices, 1)
}
data = data[1:]
} else {
if len(data) < 3 {
return nil, nil, errors.New("invalid data: too short")
}
bitmap := data[1:3]
for index, b := range bitmap {
for bitIdx := 0; bitIdx < 8; bitIdx++ {
if b&(1<<uint(bitIdx)) != 0 {
pos := index*8 + bitIdx
indices = append(indices, pos)
}
}
}
if flag&byte(16) != 0 { // flagE
indices = append(indices, 16)
log.Info("Unexpected 16th child encountered in a full node")
}
data = data[3:]
}
// Reconstruct elements
elements := make([][]byte, 0, len(indices))
for i := 0; i < len(indices); i++ {
if len(data) == 0 {
return nil, nil, errors.New("invalid data: missing size byte")
}
// Read element size
size := int(data[0])
data = data[1:]
// Check if we have enough data for the element
if len(data) < size {
return nil, nil, fmt.Errorf("invalid data: expected %d bytes, got %d", size, len(data))
}
// Extract element
if size == 0 {
elements = append(elements, nil)
// The zero-size element is practically unexpected, for node deletion
// the rlp.EmptyString is still expected. Log loudly for the potential
// programming error.
log.Error("Empty element from compressed node, please open an issue", "raw", data)
} else {
element := make([]byte, size)
copy(element, data[:size])
data = data[size:]
elements = append(elements, element)
}
}
// Check if all data is consumed
if len(data) != 0 {
return nil, nil, errors.New("invalid data: trailing bytes")
}
return elements, indices, nil
}
// decodeNodeFull decodes the byte stream of full value trie node.
func decodeNodeFull(data []byte) ([]byte, error) {
if len(data) < 1 {
return nil, errors.New("invalid data: too short")
}
flag := data[0]
if flag != byte(0) {
return nil, errors.New("invalid data: compressed node value")
}
return data[1:], nil
}
// encodeFullFrequency specifies the frequency (1/16) for encoding node in
// full format. TODO(rjl493456442) making it configurable.
const encodeFullFrequency = 16
// encodeNodeHistory encodes the history of a node. Typically, the original values
// of dirty nodes serve as the history, but this can lead to significant storage
// overhead.
//
// For full nodes, which often see only a few modified children during state
// transitions, recording the entire child set (up to 16 children at 32 bytes
// each) is inefficient. For short nodes, which often see only the value is
// modified during the state transition, recording the key part is also unnecessary.
// To compress size, we instead record the diff of the node, rather than the
// full value. It's vital to compress the overall trienode history.
//
// However, recovering a node from a series of diffs requires applying multiple
// history records, which is computationally and IO intensive. To mitigate this, we
// periodically record the full value of a node as a checkpoint. The frequency of
// these checkpoints is a tradeoff between the compression rate and read overhead.
func (s *nodeSetWithOrigin) encodeNodeHistory(root common.Hash) (map[common.Hash]map[string][]byte, error) {
var (
// the set of all encoded node history elements
nodes = make(map[common.Hash]map[string][]byte)
// encodeFullValue determines whether a node should be encoded
// in full format with a pseudo-random probabilistic algorithm.
encodeFullValue = func(owner common.Hash, path string) bool {
// For trie nodes at the first two levels of the account trie, it is very
// likely that all children are modified within a single state transition.
// In such cases, do not use diff mode.
if owner == (common.Hash{}) && len(path) < 2 {
return true
}
h := fnv.New32a()
h.Write(root.Bytes())
h.Write(owner.Bytes())
h.Write([]byte(path))
return h.Sum32()%uint32(encodeFullFrequency) == 0
}
)
for owner, origins := range s.nodeOrigin {
var posts map[string]*trienode.Node
if owner == (common.Hash{}) {
posts = s.nodeSet.accountNodes
} else {
posts = s.nodeSet.storageNodes[owner]
}
nodes[owner] = make(map[string][]byte)
for path, oldvalue := range origins {
n, exists := posts[path]
if !exists {
// something not expected
return nil, fmt.Errorf("node with origin is not found, %x-%v", owner, []byte(path))
}
encodeFull := encodeFullValue(owner, path)
if !encodeFull {
// Partial encoding is required, try to find the node diffs and
// fallback to the full-value encoding if fails.
//
// The partial encoding will be failed in these certain cases:
// - the node is deleted or was not-existent;
// - the node type has been changed (e.g, from short to full)
nElem, indices, diffs, err := trie.NodeDifference(oldvalue, n.Blob)
if err != nil {
encodeFull = true // fallback to the full node encoding
} else {
// Encode the node difference as the history element
addExt := nElem != 2 // fullNode
blob := encodeNodeCompressed(addExt, diffs, indices)
nodes[owner][path] = blob
}
}
if encodeFull {
// Encode the entire original value as the history element
nodes[owner][path] = encodeNodeFull(oldvalue)
}
}
}
return nodes, nil
}

View file

@ -18,11 +18,13 @@ package pathdb
import (
"bytes"
"math/rand"
"reflect"
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/internal/testrand"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie/trienode"
)
@ -126,3 +128,49 @@ func TestNodeSetWithOriginEncode(t *testing.T) {
t.Fatalf("Unexpected data size, got: %d, want: %d", dec2.size, s.size)
}
}
func TestEncodeFullNodeCompressed(t *testing.T) {
var (
elements [][]byte
indices []int
)
for i := 0; i <= 16; i++ {
if rand.Intn(2) == 0 {
elements = append(elements, testrand.Bytes(20))
indices = append(indices, i)
}
}
enc := encodeNodeCompressed(true, elements, indices)
decElements, decIndices, err := decodeNodeCompressed(enc)
if err != nil {
t.Fatalf("Failed to decode node compressed, %v", err)
}
if !reflect.DeepEqual(elements, decElements) {
t.Fatalf("Elements are not matched")
}
if !reflect.DeepEqual(indices, decIndices) {
t.Fatalf("Indices are not matched")
}
}
func TestEncodeShortNodeCompressed(t *testing.T) {
var (
elements [][]byte
indices []int
)
for i := 0; i < 2; i++ {
elements = append(elements, testrand.Bytes(20))
indices = append(indices, i)
}
enc := encodeNodeCompressed(false, elements, indices)
decElements, decIndices, err := decodeNodeCompressed(enc)
if err != nil {
t.Fatalf("Failed to decode node compressed, %v", err)
}
if !reflect.DeepEqual(elements, decElements) {
t.Fatalf("Elements are not matched")
}
if !reflect.DeepEqual(indices, decIndices) {
t.Fatalf("Indices are not matched")
}
}