go-ethereum/trie/node.go
rjl493456442 f51870e40e
Some checks are pending
/ Linux Build (push) Waiting to run
/ Linux Build (arm) (push) Waiting to run
/ Keeper Build (push) Waiting to run
/ Windows Build (push) Waiting to run
/ Docker Image (push) Waiting to run
rlp, trie, triedb/pathdb: compress trienode history (#32913)
This pull request introduces a mechanism to compress trienode history by
storing only the node diffs between consecutive versions.

- For full nodes, only the modified children are recorded in the history;
- For short nodes, only the modified value is stored;

If the node type has changed, or if the node is newly created or
deleted, the entire node value is stored instead.

To mitigate the overhead of reassembling nodes from diffs during history
reads, checkpoints are introduced by periodically storing full node values.

The current checkpoint interval is set to every 16 mutations, though
this parameter may be made configurable in the future.
2026-01-08 21:58:02 +08:00

334 lines
9.7 KiB
Go

// Copyright 2014 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
"bytes"
"fmt"
"io"
"strings"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/rlp"
)
var indices = []string{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "[17]"}
type node interface {
cache() (hashNode, bool)
encode(w rlp.EncoderBuffer)
fstring(string) string
}
type (
fullNode struct {
Children [17]node // Actual trie node data to encode/decode (needs custom encoder)
flags nodeFlag
}
shortNode struct {
Key []byte
Val node
flags nodeFlag
}
hashNode []byte
valueNode []byte
// fullnodeEncoder is a type used exclusively for encoding fullNode.
// Briefly instantiating a fullnodeEncoder and initializing with
// existing slices is less memory intense than using the fullNode type.
fullnodeEncoder struct {
Children [17][]byte
}
// extNodeEncoder is a type used exclusively for encoding extension node.
// Briefly instantiating a extNodeEncoder and initializing with existing
// slices is less memory intense than using the shortNode type.
extNodeEncoder struct {
Key []byte
Val []byte
}
// leafNodeEncoder is a type used exclusively for encoding leaf node.
leafNodeEncoder struct {
Key []byte
Val []byte
}
)
// EncodeRLP encodes a full node into the consensus RLP format.
func (n *fullNode) EncodeRLP(w io.Writer) error {
eb := rlp.NewEncoderBuffer(w)
n.encode(eb)
return eb.Flush()
}
// nodeFlag contains caching-related metadata about a node.
type nodeFlag struct {
hash hashNode // cached hash of the node (may be nil)
dirty bool // whether the node has changes that must be written to the database
}
func (n nodeFlag) copy() nodeFlag {
return nodeFlag{
hash: common.CopyBytes(n.hash),
dirty: n.dirty,
}
}
func (n *fullNode) cache() (hashNode, bool) { return n.flags.hash, n.flags.dirty }
func (n *shortNode) cache() (hashNode, bool) { return n.flags.hash, n.flags.dirty }
func (n hashNode) cache() (hashNode, bool) { return nil, true }
func (n valueNode) cache() (hashNode, bool) { return nil, true }
// Pretty printing.
func (n *fullNode) String() string { return n.fstring("") }
func (n *shortNode) String() string { return n.fstring("") }
func (n hashNode) String() string { return n.fstring("") }
func (n valueNode) String() string { return n.fstring("") }
func (n *fullNode) fstring(ind string) string {
resp := fmt.Sprintf("[\n%s ", ind)
for i, node := range &n.Children {
if node == nil {
resp += fmt.Sprintf("%s: <nil> ", indices[i])
} else {
resp += fmt.Sprintf("%s: %v", indices[i], node.fstring(ind+" "))
}
}
return resp + fmt.Sprintf("\n%s] ", ind)
}
func (n *shortNode) fstring(ind string) string {
return fmt.Sprintf("{%x: %v} ", n.Key, n.Val.fstring(ind+" "))
}
func (n hashNode) fstring(ind string) string {
return fmt.Sprintf("<%x> ", []byte(n))
}
func (n valueNode) fstring(ind string) string {
return fmt.Sprintf("%x ", []byte(n))
}
// mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered.
func mustDecodeNode(hash, buf []byte) node {
n, err := decodeNode(hash, buf)
if err != nil {
panic(fmt.Sprintf("node %x: %v", hash, err))
}
return n
}
// mustDecodeNodeUnsafe is a wrapper of decodeNodeUnsafe and panic if any error is
// encountered.
func mustDecodeNodeUnsafe(hash, buf []byte) node {
n, err := decodeNodeUnsafe(hash, buf)
if err != nil {
panic(fmt.Sprintf("node %x: %v", hash, err))
}
return n
}
// decodeNode parses the RLP encoding of a trie node. It will deep-copy the passed
// byte slice for decoding, so it's safe to modify the byte slice afterwards. The-
// decode performance of this function is not optimal, but it is suitable for most
// scenarios with low performance requirements and hard to determine whether the
// byte slice be modified or not.
func decodeNode(hash, buf []byte) (node, error) {
return decodeNodeUnsafe(hash, common.CopyBytes(buf))
}
// decodeNodeUnsafe parses the RLP encoding of a trie node. The passed byte slice
// will be directly referenced by node without bytes deep copy, so the input MUST
// not be changed after.
func decodeNodeUnsafe(hash, buf []byte) (node, error) {
if len(buf) == 0 {
return nil, io.ErrUnexpectedEOF
}
elems, _, err := rlp.SplitList(buf)
if err != nil {
return nil, fmt.Errorf("decode error: %v", err)
}
switch c, _ := rlp.CountValues(elems); c {
case 2:
n, err := decodeShort(hash, elems)
return n, wrapError(err, "short")
case 17:
n, err := decodeFull(hash, elems)
return n, wrapError(err, "full")
default:
return nil, fmt.Errorf("invalid number of list elements: %v", c)
}
}
func decodeShort(hash, elems []byte) (node, error) {
kbuf, rest, err := rlp.SplitString(elems)
if err != nil {
return nil, err
}
flag := nodeFlag{hash: hash}
key := compactToHex(kbuf)
if hasTerm(key) {
// value node
val, _, err := rlp.SplitString(rest)
if err != nil {
return nil, fmt.Errorf("invalid value node: %v", err)
}
return &shortNode{key, valueNode(val), flag}, nil
}
r, _, err := decodeRef(rest)
if err != nil {
return nil, wrapError(err, "val")
}
return &shortNode{key, r, flag}, nil
}
func decodeFull(hash, elems []byte) (*fullNode, error) {
n := &fullNode{flags: nodeFlag{hash: hash}}
for i := 0; i < 16; i++ {
cld, rest, err := decodeRef(elems)
if err != nil {
return n, wrapError(err, fmt.Sprintf("[%d]", i))
}
n.Children[i], elems = cld, rest
}
val, _, err := rlp.SplitString(elems)
if err != nil {
return n, err
}
if len(val) > 0 {
n.Children[16] = valueNode(val)
}
return n, nil
}
const hashLen = len(common.Hash{})
func decodeRef(buf []byte) (node, []byte, error) {
kind, val, rest, err := rlp.Split(buf)
if err != nil {
return nil, buf, err
}
switch {
case kind == rlp.List:
// 'embedded' node reference. The encoding must be smaller
// than a hash in order to be valid.
if size := len(buf) - len(rest); size > hashLen {
err := fmt.Errorf("oversized embedded node (size is %d bytes, want size < %d)", size, hashLen)
return nil, buf, err
}
// The buffer content has already been copied or is safe to use;
// no additional copy is required.
n, err := decodeNodeUnsafe(nil, buf)
return n, rest, err
case kind == rlp.String && len(val) == 0:
// empty node
return nil, rest, nil
case kind == rlp.String && len(val) == 32:
return hashNode(val), rest, nil
default:
return nil, nil, fmt.Errorf("invalid RLP string size %d (want 0 or 32)", len(val))
}
}
// decodeNodeElements parses the RLP encoding of a trie node and returns all the
// elements in raw byte format.
//
// For full node, it returns a slice of 17 elements;
// For short node, it returns a slice of 2 elements;
func decodeNodeElements(buf []byte) ([][]byte, error) {
if len(buf) == 0 {
return nil, io.ErrUnexpectedEOF
}
return rlp.SplitListValues(buf)
}
// encodeNodeElements encodes the provided node elements into a rlp list.
func encodeNodeElements(elements [][]byte) ([]byte, error) {
if len(elements) != 2 && len(elements) != 17 {
return nil, fmt.Errorf("invalid number of elements: %d", len(elements))
}
return rlp.MergeListValues(elements)
}
// NodeDifference accepts two RLP-encoding nodes and figures out the difference
// between them.
//
// An error is returned if any of the provided blob is nil, or the type of nodes
// are different.
func NodeDifference(oldvalue []byte, newvalue []byte) (int, []int, [][]byte, error) {
oldElems, err := decodeNodeElements(oldvalue)
if err != nil {
return 0, nil, nil, err
}
newElems, err := decodeNodeElements(newvalue)
if err != nil {
return 0, nil, nil, err
}
if len(oldElems) != len(newElems) {
return 0, nil, nil, fmt.Errorf("different node type, old elements: %d, new elements: %d", len(oldElems), len(newElems))
}
var (
indices = make([]int, 0, len(oldElems))
diff = make([][]byte, 0, len(oldElems))
)
for i := 0; i < len(oldElems); i++ {
if !bytes.Equal(oldElems[i], newElems[i]) {
indices = append(indices, i)
diff = append(diff, oldElems[i])
}
}
return len(oldElems), indices, diff, nil
}
// ReassembleNode accepts a RLP-encoding node along with a set of mutations,
// applying the modification diffs according to the indices and re-assemble.
func ReassembleNode(blob []byte, mutations [][][]byte, indices [][]int) ([]byte, error) {
if len(mutations) == 0 && len(indices) == 0 {
return blob, nil
}
elements, err := decodeNodeElements(blob)
if err != nil {
return nil, err
}
for i := 0; i < len(mutations); i++ {
for j, pos := range indices[i] {
elements[pos] = mutations[i][j]
}
}
return encodeNodeElements(elements)
}
// wraps a decoding error with information about the path to the
// invalid child node (for debugging encoding issues).
type decodeError struct {
what error
stack []string
}
func wrapError(err error, ctx string) error {
if err == nil {
return nil
}
if decErr, ok := err.(*decodeError); ok {
decErr.stack = append(decErr.stack, ctx)
return decErr
}
return &decodeError{err, []string{ctx}}
}
func (err *decodeError) Error() string {
return fmt.Sprintf("%v (decode path: %s)", err.what, strings.Join(err.stack, "<-"))
}