rlp, trie, triedb/pathdb: compress trienode history (#32913)

This pull request introduces a mechanism to compress trienode history by storing only the node diffs between consecutive versions. - For full nodes, only the modified children are recorded in the history; - For short nodes, only the modified value is stored; If the node type has changed, or if the node is newly created or deleted, the entire node value is stored instead. To mitigate the overhead of reassembling nodes from diffs during history reads, checkpoints are introduced by periodically storing full node values. The current checkpoint interval is set to every 16 mutations, though this parameter may be made configurable in the future.
2026-02-26 07:37:20 +00:00 · 2026-01-08 21:58:02 +08:00 · 2026-01-08 21:58:02 +08:00 · f51870e40e
commit f51870e40e
parent 52f998d5ec
6 changed files with 967 additions and 0 deletions
--- a/rlp/raw.go
+++ b/rlp/raw.go
@ -152,6 +152,35 @@ func CountValues(b []byte) (int, error) {
 	return i, nil
 }

+// SplitListValues extracts the raw elements from the list RLP-encoding blob.
+func SplitListValues(b []byte) ([][]byte, error) {
+	b, _, err := SplitList(b)
+	if err != nil {
+		return nil, err
+	}
+	var elements [][]byte
+	for len(b) > 0 {
+		_, tagsize, size, err := readKind(b)
+		if err != nil {
+			return nil, err
+		}
+		elements = append(elements, b[:tagsize+size])
+		b = b[tagsize+size:]
+	}
+	return elements, nil
+}
+
+// MergeListValues takes a list of raw elements and rlp-encodes them as list.
+func MergeListValues(elems [][]byte) ([]byte, error) {
+	w := NewEncoderBuffer(nil)
+	offset := w.List()
+	for _, elem := range elems {
+		w.Write(elem)
+	}
+	w.ListEnd(offset)
+	return w.ToBytes(), nil
+}
+
 func readKind(buf []byte) (k Kind, tagsize, contentsize uint64, err error) {
 	if len(buf) == 0 {
 		return 0, 0, 0, io.ErrUnexpectedEOF
--- a/rlp/raw_test.go
+++ b/rlp/raw_test.go
@ -336,3 +336,269 @@ func TestBytesSize(t *testing.T) {
 		}
 	}
 }
+
+func TestSplitListValues(t *testing.T) {
+	tests := []struct {
+		name    string
+		input   string   // hex-encoded RLP list
+		want    []string // hex-encoded expected elements
+		wantErr error
+	}{
+		{
+			name:  "empty list",
+			input: "C0",
+			want:  []string{},
+		},
+		{
+			name:  "single byte element",
+			input: "C101",
+			want:  []string{"01"},
+		},
+		{
+			name:  "single empty string",
+			input: "C180",
+			want:  []string{"80"},
+		},
+		{
+			name:  "two byte elements",
+			input: "C20102",
+			want:  []string{"01", "02"},
+		},
+		{
+			name:  "three elements",
+			input: "C3010203",
+			want:  []string{"01", "02", "03"},
+		},
+		{
+			name:  "mixed size elements",
+			input: "C80182020283030303",
+			want:  []string{"01", "820202", "83030303"},
+		},
+		{
+			name:  "string elements",
+			input: "C88363617483646F67",
+			want:  []string{"83636174", "83646F67"}, // cat,dog
+		},
+		{
+			name:  "nested list element",
+			input: "C4C3010203",         // [[1,2,3]]
+			want:  []string{"C3010203"}, // [1,2,3]
+		},
+		{
+			name:  "multiple nested lists",
+			input: "C6C20102C20304",             // [[1,2],[3,4]]
+			want:  []string{"C20102", "C20304"}, // [1,2], [3,4]
+		},
+		{
+			name:  "large list",
+			input: "C6010203040506",
+			want:  []string{"01", "02", "03", "04", "05", "06"},
+		},
+		{
+			name:  "list with empty strings",
+			input: "C3808080",
+			want:  []string{"80", "80", "80"},
+		},
+		// Error cases
+		{
+			name:    "single byte",
+			input:   "01",
+			wantErr: ErrExpectedList,
+		},
+		{
+			name:    "string",
+			input:   "83636174",
+			wantErr: ErrExpectedList,
+		},
+		{
+			name:    "empty input",
+			input:   "",
+			wantErr: io.ErrUnexpectedEOF,
+		},
+		{
+			name:    "invalid list - value too large",
+			input:   "C60102030405",
+			wantErr: ErrValueTooLarge,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := SplitListValues(unhex(tt.input))
+			if !errors.Is(err, tt.wantErr) {
+				t.Errorf("SplitListValues() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+			if err != nil {
+				return
+			}
+			if len(got) != len(tt.want) {
+				t.Errorf("SplitListValues() got %d elements, want %d", len(got), len(tt.want))
+				return
+			}
+			for i, elem := range got {
+				want := unhex(tt.want[i])
+				if !bytes.Equal(elem, want) {
+					t.Errorf("SplitListValues() element[%d] = %x, want %x", i, elem, want)
+				}
+			}
+		})
+	}
+}
+
+func TestMergeListValues(t *testing.T) {
+	tests := []struct {
+		name    string
+		elems   []string // hex-encoded RLP elements
+		want    string   // hex-encoded expected result
+		wantErr error
+	}{
+		{
+			name:  "empty list",
+			elems: []string{},
+			want:  "C0",
+		},
+		{
+			name:  "single byte element",
+			elems: []string{"01"},
+			want:  "C101",
+		},
+		{
+			name:  "single empty string",
+			elems: []string{"80"},
+			want:  "C180",
+		},
+		{
+			name:  "two byte elements",
+			elems: []string{"01", "02"},
+			want:  "C20102",
+		},
+		{
+			name:  "three elements",
+			elems: []string{"01", "02", "03"},
+			want:  "C3010203",
+		},
+		{
+			name:  "mixed size elements",
+			elems: []string{"01", "820202", "83030303"},
+			want:  "C80182020283030303",
+		},
+		{
+			name:  "string elements",
+			elems: []string{"83636174", "83646F67"}, // cat, dog
+			want:  "C88363617483646F67",
+		},
+		{
+			name:  "nested list element",
+			elems: []string{"C20102", "03"}, // [[1, 2], 3]
+			want:  "C4C2010203",
+		},
+		{
+			name:  "multiple nested lists",
+			elems: []string{"C20102", "C3030405"}, // [[1,2],[3,4,5]],
+			want:  "C7C20102C3030405",
+		},
+		{
+			name:  "large list",
+			elems: []string{"01", "02", "03", "04", "05", "06"},
+			want:  "C6010203040506",
+		},
+		{
+			name:  "list with empty strings",
+			elems: []string{"80", "80", "80"},
+			want:  "C3808080",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			elems := make([][]byte, len(tt.elems))
+			for i, s := range tt.elems {
+				elems[i] = unhex(s)
+			}
+			got, err := MergeListValues(elems)
+			if !errors.Is(err, tt.wantErr) {
+				t.Errorf("MergeListValues() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+			if err != nil {
+				return
+			}
+			want := unhex(tt.want)
+			if !bytes.Equal(got, want) {
+				t.Errorf("MergeListValues() = %x, want %x", got, want)
+			}
+		})
+	}
+}
+
+func TestSplitMergeList(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string // hex-encoded RLP list
+	}{
+		{
+			name:  "empty list",
+			input: "C0",
+		},
+		{
+			name:  "single byte element",
+			input: "C101",
+		},
+		{
+			name:  "two byte elements",
+			input: "C20102",
+		},
+		{
+			name:  "three elements",
+			input: "C3010203",
+		},
+		{
+			name:  "mixed size elements",
+			input: "C80182020283030303",
+		},
+		{
+			name:  "string elements",
+			input: "C88363617483646F67", // [cat, dog]
+		},
+		{
+			name:  "nested list element",
+			input: "C4C2010203", // [[1,2],3]
+		},
+		{
+			name:  "multiple nested lists",
+			input: "C6C20102C20304", // [[1,2],[3,4]]
+		},
+		{
+			name:  "large list",
+			input: "C6010203040506", // [1,2,3,4,5,6]
+		},
+		{
+			name:  "list with empty strings",
+			input: "C3808080", // ["", "", ""]
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			original := unhex(tt.input)
+
+			// Split the list
+			elements, err := SplitListValues(original)
+			if err != nil {
+				t.Fatalf("SplitListValues() error = %v", err)
+			}
+
+			// Merge back
+			merged, err := MergeListValues(elements)
+			if err != nil {
+				t.Fatalf("MergeListValues() error = %v", err)
+			}
+
+			// The merged result should match the original
+			if !bytes.Equal(merged, original) {
+				t.Errorf("Round trip failed: original = %x, merged = %x", original, merged)
+			}
+		})
+	}
+}
--- a/trie/node.go
+++ b/trie/node.go
@ -17,6 +17,7 @@
 package trie

 import (
+	"bytes"
 	"fmt"
 	"io"
 	"strings"
@ -242,6 +243,74 @@ func decodeRef(buf []byte) (node, []byte, error) {
 	}
 }

+// decodeNodeElements parses the RLP encoding of a trie node and returns all the
+// elements in raw byte format.
+//
+// For full node, it returns a slice of 17 elements;
+// For short node, it returns a slice of 2 elements;
+func decodeNodeElements(buf []byte) ([][]byte, error) {
+	if len(buf) == 0 {
+		return nil, io.ErrUnexpectedEOF
+	}
+	return rlp.SplitListValues(buf)
+}
+
+// encodeNodeElements encodes the provided node elements into a rlp list.
+func encodeNodeElements(elements [][]byte) ([]byte, error) {
+	if len(elements) != 2 && len(elements) != 17 {
+		return nil, fmt.Errorf("invalid number of elements: %d", len(elements))
+	}
+	return rlp.MergeListValues(elements)
+}
+
+// NodeDifference accepts two RLP-encoding nodes and figures out the difference
+// between them.
+//
+// An error is returned if any of the provided blob is nil, or the type of nodes
+// are different.
+func NodeDifference(oldvalue []byte, newvalue []byte) (int, []int, [][]byte, error) {
+	oldElems, err := decodeNodeElements(oldvalue)
+	if err != nil {
+		return 0, nil, nil, err
+	}
+	newElems, err := decodeNodeElements(newvalue)
+	if err != nil {
+		return 0, nil, nil, err
+	}
+	if len(oldElems) != len(newElems) {
+		return 0, nil, nil, fmt.Errorf("different node type, old elements: %d, new elements: %d", len(oldElems), len(newElems))
+	}
+	var (
+		indices = make([]int, 0, len(oldElems))
+		diff    = make([][]byte, 0, len(oldElems))
+	)
+	for i := 0; i < len(oldElems); i++ {
+		if !bytes.Equal(oldElems[i], newElems[i]) {
+			indices = append(indices, i)
+			diff = append(diff, oldElems[i])
+		}
+	}
+	return len(oldElems), indices, diff, nil
+}
+
+// ReassembleNode accepts a RLP-encoding node along with a set of mutations,
+// applying the modification diffs according to the indices and re-assemble.
+func ReassembleNode(blob []byte, mutations [][][]byte, indices [][]int) ([]byte, error) {
+	if len(mutations) == 0 && len(indices) == 0 {
+		return blob, nil
+	}
+	elements, err := decodeNodeElements(blob)
+	if err != nil {
+		return nil, err
+	}
+	for i := 0; i < len(mutations); i++ {
+		for j, pos := range indices[i] {
+			elements[pos] = mutations[i][j]
+		}
+	}
+	return encodeNodeElements(elements)
+}
+
 // wraps a decoding error with information about the path to the
 // invalid child node (for debugging encoding issues).
 type decodeError struct {
--- a/trie/node_test.go
+++ b/trie/node_test.go
@ -18,9 +18,12 @@ package trie

 import (
 	"bytes"
+	"math/rand"
+	"reflect"
 	"testing"

 	"github.com/ethereum/go-ethereum/crypto"
+	"github.com/ethereum/go-ethereum/internal/testrand"
 	"github.com/ethereum/go-ethereum/rlp"
 )

@ -94,6 +97,286 @@ func TestDecodeFullNode(t *testing.T) {
 	}
 }

+func makeTestLeafNode(small bool) []byte {
+	l := leafNodeEncoder{}
+	l.Key = hexToCompact(keybytesToHex(testrand.Bytes(10)))
+	if small {
+		l.Val = testrand.Bytes(10)
+	} else {
+		l.Val = testrand.Bytes(32)
+	}
+	buf := rlp.NewEncoderBuffer(nil)
+	l.encode(buf)
+	return buf.ToBytes()
+}
+
+func makeTestFullNode(small bool) []byte {
+	n := fullnodeEncoder{}
+	for i := 0; i < 16; i++ {
+		switch rand.Intn(3) {
+		case 0:
+			// write nil
+		case 1:
+			// write hash
+			n.Children[i] = testrand.Bytes(32)
+		case 2:
+			// write embedded node
+			n.Children[i] = makeTestLeafNode(small)
+		}
+	}
+	n.Children[16] = testrand.Bytes(32) // value
+	buf := rlp.NewEncoderBuffer(nil)
+	n.encode(buf)
+	return buf.ToBytes()
+}
+
+func TestEncodeDecodeNodeElements(t *testing.T) {
+	var nodes [][]byte
+	nodes = append(nodes, makeTestFullNode(true))
+	nodes = append(nodes, makeTestFullNode(false))
+	nodes = append(nodes, makeTestLeafNode(true))
+	nodes = append(nodes, makeTestLeafNode(false))
+
+	for _, blob := range nodes {
+		elements, err := decodeNodeElements(blob)
+		if err != nil {
+			t.Fatalf("Failed to decode node elements: %v", err)
+		}
+		enc, err := encodeNodeElements(elements)
+		if err != nil {
+			t.Fatalf("Failed to encode node elements: %v", err)
+		}
+		if !bytes.Equal(enc, blob) {
+			t.Fatalf("Unexpected encoded node element, want: %v, got: %v", blob, enc)
+		}
+	}
+}
+
+func makeTestLeafNodePair() ([]byte, []byte, [][]byte, []int) {
+	var (
+		na = leafNodeEncoder{}
+		nb = leafNodeEncoder{}
+	)
+	key := keybytesToHex(testrand.Bytes(10))
+	na.Key = hexToCompact(key)
+	nb.Key = hexToCompact(key)
+
+	valA := testrand.Bytes(32)
+	valB := testrand.Bytes(32)
+	na.Val = valA
+	nb.Val = valB
+
+	bufa, bufb := rlp.NewEncoderBuffer(nil), rlp.NewEncoderBuffer(nil)
+	na.encode(bufa)
+	nb.encode(bufb)
+	diff, _ := rlp.EncodeToBytes(valA)
+	return bufa.ToBytes(), bufb.ToBytes(), [][]byte{diff}, []int{1}
+}
+
+func makeTestFullNodePair() ([]byte, []byte, [][]byte, []int) {
+	var (
+		na      = fullnodeEncoder{}
+		nb      = fullnodeEncoder{}
+		indices []int
+		values  [][]byte
+	)
+	for i := 0; i < 16; i++ {
+		switch rand.Intn(3) {
+		case 0:
+			// write nil
+		case 1:
+			// write same
+			var child []byte
+			if rand.Intn(2) == 0 {
+				child = testrand.Bytes(32) // hashnode
+			} else {
+				child = makeTestLeafNode(true) // embedded node
+			}
+			na.Children[i] = child
+			nb.Children[i] = child
+		case 2:
+			// write different
+			var (
+				va   []byte
+				diff []byte
+			)
+			rnd := rand.Intn(3)
+			if rnd == 0 {
+				va = testrand.Bytes(32) // hashnode
+				diff, _ = rlp.EncodeToBytes(va)
+			} else if rnd == 1 {
+				va = makeTestLeafNode(true) // embedded node
+				diff = va
+			} else {
+				va = nil
+				diff = rlp.EmptyString
+			}
+			vb := testrand.Bytes(32) // hashnode
+			na.Children[i] = va
+			nb.Children[i] = vb
+
+			indices = append(indices, i)
+			values = append(values, diff)
+		}
+	}
+	na.Children[16] = nil
+	nb.Children[16] = nil
+
+	bufa, bufb := rlp.NewEncoderBuffer(nil), rlp.NewEncoderBuffer(nil)
+	na.encode(bufa)
+	nb.encode(bufb)
+	return bufa.ToBytes(), bufb.ToBytes(), values, indices
+}
+
+func TestNodeDifference(t *testing.T) {
+	type testsuite struct {
+		old        []byte
+		new        []byte
+		expErr     bool
+		expIndices []int
+		expValues  [][]byte
+	}
+	var tests = []testsuite{
+		// Invalid node data
+		{
+			old: nil, new: nil, expErr: true,
+		},
+		{
+			old: testrand.Bytes(32), new: nil, expErr: true,
+		},
+		{
+			old: nil, new: testrand.Bytes(32), expErr: true,
+		},
+		{
+			old: testrand.Bytes(32), new: testrand.Bytes(32), expErr: true,
+		},
+		// Different node type
+		{
+			old: makeTestLeafNode(true), new: makeTestFullNode(true), expErr: true,
+		},
+	}
+	for range 10 {
+		va, vb, elements, indices := makeTestLeafNodePair()
+		tests = append(tests, testsuite{
+			old:        va,
+			new:        vb,
+			expErr:     false,
+			expIndices: indices,
+			expValues:  elements,
+		})
+	}
+	for range 10 {
+		va, vb, elements, indices := makeTestFullNodePair()
+		tests = append(tests, testsuite{
+			old:        va,
+			new:        vb,
+			expErr:     false,
+			expIndices: indices,
+			expValues:  elements,
+		})
+	}
+
+	for _, test := range tests {
+		_, indices, values, err := NodeDifference(test.old, test.new)
+		if test.expErr && err == nil {
+			t.Fatal("Expect error, got nil")
+		}
+		if !test.expErr && err != nil {
+			t.Fatalf("Unexpect error, %v", err)
+		}
+		if err == nil {
+			if !reflect.DeepEqual(indices, test.expIndices) {
+				t.Fatalf("Unexpected indices, want: %v, got: %v", test.expIndices, indices)
+			}
+			if !reflect.DeepEqual(values, test.expValues) {
+				t.Fatalf("Unexpected values, want: %v, got: %v", test.expValues, values)
+			}
+		}
+	}
+}
+
+func TestReassembleFullNode(t *testing.T) {
+	var fn fullnodeEncoder
+	for i := 0; i < 16; i++ {
+		if rand.Intn(2) == 0 {
+			fn.Children[i] = testrand.Bytes(32)
+		}
+	}
+	buf := rlp.NewEncoderBuffer(nil)
+	fn.encode(buf)
+	enc := buf.ToBytes()
+
+	// Generate a list of diffs
+	var (
+		values  [][][]byte
+		indices [][]int
+	)
+	for i := 0; i < 10; i++ {
+		var (
+			pos       = make(map[int]struct{})
+			poslist   []int
+			valuelist [][]byte
+		)
+		for j := 0; j < 3; j++ {
+			p := rand.Intn(16)
+			if _, ok := pos[p]; ok {
+				continue
+			}
+			pos[p] = struct{}{}
+
+			nh := testrand.Bytes(32)
+			diff, _ := rlp.EncodeToBytes(nh)
+			poslist = append(poslist, p)
+			valuelist = append(valuelist, diff)
+			fn.Children[p] = nh
+		}
+		values = append(values, valuelist)
+		indices = append(indices, poslist)
+	}
+	reassembled, err := ReassembleNode(enc, values, indices)
+	if err != nil {
+		t.Fatalf("Failed to re-assemble full node %v", err)
+	}
+	buf2 := rlp.NewEncoderBuffer(nil)
+	fn.encode(buf2)
+	enc2 := buf2.ToBytes()
+	if !reflect.DeepEqual(enc2, reassembled) {
+		t.Fatalf("Unexpeted reassembled node")
+	}
+}
+
+func TestReassembleShortNode(t *testing.T) {
+	var ln leafNodeEncoder
+	ln.Key = hexToCompact(keybytesToHex(testrand.Bytes(10)))
+	ln.Val = testrand.Bytes(10)
+	buf := rlp.NewEncoderBuffer(nil)
+	ln.encode(buf)
+	enc := buf.ToBytes()
+
+	// Generate a list of diffs
+	var (
+		values  [][][]byte
+		indices [][]int
+	)
+	for i := 0; i < 10; i++ {
+		val := testrand.Bytes(10)
+		ln.Val = val
+		diff, _ := rlp.EncodeToBytes(val)
+		values = append(values, [][]byte{diff})
+		indices = append(indices, []int{1})
+	}
+	reassembled, err := ReassembleNode(enc, values, indices)
+	if err != nil {
+		t.Fatalf("Failed to re-assemble full node %v", err)
+	}
+	buf2 := rlp.NewEncoderBuffer(nil)
+	ln.encode(buf2)
+	enc2 := buf2.ToBytes()
+	if !reflect.DeepEqual(enc2, reassembled) {
+		t.Fatalf("Unexpeted reassembled node")
+	}
+}
+
 // goos: darwin
 // goarch: arm64
 // pkg: github.com/ethereum/go-ethereum/trie
--- a/triedb/pathdb/nodes.go
+++ b/triedb/pathdb/nodes.go
@ -14,12 +14,14 @@
 // You should have received a copy of the GNU Lesser General Public License
 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

+// nolint:unused
 package pathdb

 import (
 	"bytes"
 	"errors"
 	"fmt"
+	"hash/fnv"
 	"io"
 	"maps"

@ -30,6 +32,7 @@ import (
 	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/log"
 	"github.com/ethereum/go-ethereum/rlp"
+	"github.com/ethereum/go-ethereum/trie"
 	"github.com/ethereum/go-ethereum/trie/trienode"
 )

@ -424,3 +427,272 @@ func (s *nodeSetWithOrigin) decode(r *rlp.Stream) error {
 	s.computeSize()
 	return nil
 }
+
+// encodeNodeCompressed encodes the trie node differences between two consecutive
+// versions into byte stream. The format is as below:
+//
+// - metadata byte layout (1 byte):
+//
+//	┌──── Bits (from MSB to LSB) ───┐
+//	│ 7 │ 6 │ 5 │ 4 │ 3 │ 2 │ 1 │ 0 │
+//	└───────────────────────────────┘
+//	  │   │   │   │   │   │   │   └─ FlagA: set if value is encoded in compressed format
+//	  │   │   │   │   │   │   └───── FlagB: set if no extended bitmap is present after the metadata byte
+//	  │   │   │   │   │   └───────── FlagC: bitmap for node (only used when flagB == 1)
+//	  │   │   │   │   └───────────── FlagD: bitmap for node (only used when flagB == 1)
+//	  │   │   │   └───────────────── FlagE: reserved (marks the presence of the 16th child in a full node)
+//	  │   │   └───────────────────── FlagF: reserved
+//	  │   └───────────────────────── FlagG: reserved
+//	  └───────────────────────────── FlagH: reserved
+//
+// Note:
+// - If flagB is 1, the node refers to a shortNode;
+//   - flagC indicates whether the key of the shortNode is recorded.
+//   - flagD indicates whether the value of the shortNode is recorded.
+//
+// - If flagB is 0, the node refers to a fullNode;
+//   - each bit in extended bitmap indicates whether the corresponding
+//     child have been modified.
+//
+// Example:
+//
+// 0b_0000_1011
+//
+// Bit0=1, Bit1=1 -> node in compressed format, no extended bitmap
+// Bit2=0, Bit3=1 -> the key of a short node is not stored; its value is stored.
+//
+// - 2 bytes extended bitmap (only if the flagB in metadata is 0), each bit
+// represents a corresponding child;
+//
+// - concatenation of original value of modified children along with its size;
+func encodeNodeCompressed(addExtension bool, elements [][]byte, indices []int) []byte {
+	var (
+		enc  []byte
+		flag = byte(1) // The compression format indicator
+	)
+	// Pre-allocate the byte slice for the node encoder
+	size := 1
+	if addExtension {
+		size += 2
+	}
+	for _, element := range elements {
+		size += len(element) + 1
+	}
+	enc = make([]byte, 0, size)
+
+	if !addExtension {
+		flag |= 2 // The embedded bitmap indicator
+
+		// Embedded bitmap
+		for _, pos := range indices {
+			flag |= 1 << (pos + 2)
+		}
+		enc = append(enc, flag)
+	} else {
+		// Extended bitmap
+		bitmap := make([]byte, 2) // bitmaps for at most 16 children
+		for _, pos := range indices {
+			// Children[16] is only theoretically possible in the Merkle-Patricia-trie,
+			// in practice this field is never used in the Ethereum case. If it occurs,
+			// use the FlagE for marking the presence.
+			if pos >= 16 {
+				log.Warn("Unexpected 16th child encountered in a full node")
+				flag |= 1 << 4 // Use the reserved flagE
+				continue
+			}
+			bitIndex := uint(pos % 8)
+			bitmap[pos/8] |= 1 << bitIndex
+		}
+		enc = append(enc, flag)
+		enc = append(enc, bitmap...)
+	}
+	for _, element := range elements {
+		enc = append(enc, byte(len(element))) // 1 byte is sufficient for element size
+		enc = append(enc, element...)
+	}
+	return enc
+}
+
+// encodeNodeFull encodes the full trie node value into byte stream. The format is
+// as below:
+//
+// - metadata byte layout (1 byte): 0b0
+// - node value
+func encodeNodeFull(value []byte) []byte {
+	enc := make([]byte, len(value)+1)
+	copy(enc[1:], value)
+	return enc
+}
+
+// decodeNodeCompressed decodes the byte stream of compressed trie node
+// back to the original elements and their indices.
+//
+// It assumes the byte stream contains a compressed format node.
+func decodeNodeCompressed(data []byte) ([][]byte, []int, error) {
+	if len(data) < 1 {
+		return nil, nil, errors.New("invalid data: too short")
+	}
+	flag := data[0]
+	if flag&byte(1) == 0 {
+		return nil, nil, errors.New("invalid data: full node value")
+	}
+	noExtend := flag&byte(2) != 0
+
+	// Reconstruct indices from bitmap
+	var indices []int
+	if noExtend {
+		if flag&byte(4) != 0 { // flagC
+			indices = append(indices, 0)
+		}
+		if flag&byte(8) != 0 { // flagD
+			indices = append(indices, 1)
+		}
+		data = data[1:]
+	} else {
+		if len(data) < 3 {
+			return nil, nil, errors.New("invalid data: too short")
+		}
+		bitmap := data[1:3]
+		for index, b := range bitmap {
+			for bitIdx := 0; bitIdx < 8; bitIdx++ {
+				if b&(1<<uint(bitIdx)) != 0 {
+					pos := index*8 + bitIdx
+					indices = append(indices, pos)
+				}
+			}
+		}
+		if flag&byte(16) != 0 { // flagE
+			indices = append(indices, 16)
+			log.Info("Unexpected 16th child encountered in a full node")
+		}
+		data = data[3:]
+	}
+	// Reconstruct elements
+	elements := make([][]byte, 0, len(indices))
+	for i := 0; i < len(indices); i++ {
+		if len(data) == 0 {
+			return nil, nil, errors.New("invalid data: missing size byte")
+		}
+		// Read element size
+		size := int(data[0])
+		data = data[1:]
+
+		// Check if we have enough data for the element
+		if len(data) < size {
+			return nil, nil, fmt.Errorf("invalid data: expected %d bytes, got %d", size, len(data))
+		}
+		// Extract element
+		if size == 0 {
+			elements = append(elements, nil)
+
+			// The zero-size element is practically unexpected, for node deletion
+			// the rlp.EmptyString is still expected. Log loudly for the potential
+			// programming error.
+			log.Error("Empty element from compressed node, please open an issue", "raw", data)
+		} else {
+			element := make([]byte, size)
+			copy(element, data[:size])
+			data = data[size:]
+			elements = append(elements, element)
+		}
+	}
+	// Check if all data is consumed
+	if len(data) != 0 {
+		return nil, nil, errors.New("invalid data: trailing bytes")
+	}
+	return elements, indices, nil
+}
+
+// decodeNodeFull decodes the byte stream of full value trie node.
+func decodeNodeFull(data []byte) ([]byte, error) {
+	if len(data) < 1 {
+		return nil, errors.New("invalid data: too short")
+	}
+	flag := data[0]
+	if flag != byte(0) {
+		return nil, errors.New("invalid data: compressed node value")
+	}
+	return data[1:], nil
+}
+
+// encodeFullFrequency specifies the frequency (1/16) for encoding node in
+// full format. TODO(rjl493456442) making it configurable.
+const encodeFullFrequency = 16
+
+// encodeNodeHistory encodes the history of a node. Typically, the original values
+// of dirty nodes serve as the history, but this can lead to significant storage
+// overhead.
+//
+// For full nodes, which often see only a few modified children during state
+// transitions, recording the entire child set (up to 16 children at 32 bytes
+// each) is inefficient. For short nodes, which often see only the value is
+// modified during the state transition, recording the key part is also unnecessary.
+// To compress size, we instead record the diff of the node, rather than the
+// full value. It's vital to compress the overall trienode history.
+//
+// However, recovering a node from a series of diffs requires applying multiple
+// history records, which is computationally and IO intensive. To mitigate this, we
+// periodically record the full value of a node as a checkpoint. The frequency of
+// these checkpoints is a tradeoff between the compression rate and read overhead.
+func (s *nodeSetWithOrigin) encodeNodeHistory(root common.Hash) (map[common.Hash]map[string][]byte, error) {
+	var (
+		// the set of all encoded node history elements
+		nodes = make(map[common.Hash]map[string][]byte)
+
+		// encodeFullValue determines whether a node should be encoded
+		// in full format with a pseudo-random probabilistic algorithm.
+		encodeFullValue = func(owner common.Hash, path string) bool {
+			// For trie nodes at the first two levels of the account trie, it is very
+			// likely that all children are modified within a single state transition.
+			// In such cases, do not use diff mode.
+			if owner == (common.Hash{}) && len(path) < 2 {
+				return true
+			}
+			h := fnv.New32a()
+			h.Write(root.Bytes())
+			h.Write(owner.Bytes())
+			h.Write([]byte(path))
+			return h.Sum32()%uint32(encodeFullFrequency) == 0
+		}
+	)
+	for owner, origins := range s.nodeOrigin {
+		var posts map[string]*trienode.Node
+		if owner == (common.Hash{}) {
+			posts = s.nodeSet.accountNodes
+		} else {
+			posts = s.nodeSet.storageNodes[owner]
+		}
+		nodes[owner] = make(map[string][]byte)
+
+		for path, oldvalue := range origins {
+			n, exists := posts[path]
+			if !exists {
+				// something not expected
+				return nil, fmt.Errorf("node with origin is not found, %x-%v", owner, []byte(path))
+			}
+			encodeFull := encodeFullValue(owner, path)
+			if !encodeFull {
+				// Partial encoding is required, try to find the node diffs and
+				// fallback to the full-value encoding if fails.
+				//
+				// The partial encoding will be failed in these certain cases:
+				// - the node is deleted or was not-existent;
+				// - the node type has been changed (e.g, from short to full)
+				nElem, indices, diffs, err := trie.NodeDifference(oldvalue, n.Blob)
+				if err != nil {
+					encodeFull = true // fallback to the full node encoding
+				} else {
+					// Encode the node difference as the history element
+					addExt := nElem != 2 // fullNode
+					blob := encodeNodeCompressed(addExt, diffs, indices)
+					nodes[owner][path] = blob
+				}
+			}
+			if encodeFull {
+				// Encode the entire original value as the history element
+				nodes[owner][path] = encodeNodeFull(oldvalue)
+			}
+		}
+	}
+	return nodes, nil
+}
--- a/triedb/pathdb/nodes_test.go
+++ b/triedb/pathdb/nodes_test.go
@ -18,11 +18,13 @@ package pathdb

 import (
 	"bytes"
+	"math/rand"
 	"reflect"
 	"testing"

 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/crypto"
+	"github.com/ethereum/go-ethereum/internal/testrand"
 	"github.com/ethereum/go-ethereum/rlp"
 	"github.com/ethereum/go-ethereum/trie/trienode"
 )
@ -126,3 +128,49 @@ func TestNodeSetWithOriginEncode(t *testing.T) {
 		t.Fatalf("Unexpected data size, got: %d, want: %d", dec2.size, s.size)
 	}
 }
+
+func TestEncodeFullNodeCompressed(t *testing.T) {
+	var (
+		elements [][]byte
+		indices  []int
+	)
+	for i := 0; i <= 16; i++ {
+		if rand.Intn(2) == 0 {
+			elements = append(elements, testrand.Bytes(20))
+			indices = append(indices, i)
+		}
+	}
+	enc := encodeNodeCompressed(true, elements, indices)
+	decElements, decIndices, err := decodeNodeCompressed(enc)
+	if err != nil {
+		t.Fatalf("Failed to decode node compressed, %v", err)
+	}
+	if !reflect.DeepEqual(elements, decElements) {
+		t.Fatalf("Elements are not matched")
+	}
+	if !reflect.DeepEqual(indices, decIndices) {
+		t.Fatalf("Indices are not matched")
+	}
+}
+
+func TestEncodeShortNodeCompressed(t *testing.T) {
+	var (
+		elements [][]byte
+		indices  []int
+	)
+	for i := 0; i < 2; i++ {
+		elements = append(elements, testrand.Bytes(20))
+		indices = append(indices, i)
+	}
+	enc := encodeNodeCompressed(false, elements, indices)
+	decElements, decIndices, err := decodeNodeCompressed(enc)
+	if err != nil {
+		t.Fatalf("Failed to decode node compressed, %v", err)
+	}
+	if !reflect.DeepEqual(elements, decElements) {
+		t.Fatalf("Elements are not matched")
+	}
+	if !reflect.DeepEqual(indices, decIndices) {
+		t.Fatalf("Indices are not matched")
+	}
+}