mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-02-26 15:47:21 +00:00
This pull request removes the node copy operation to reduce memory allocation. Key Changes as below: **(a) Use `decodeNodeUnsafe` for decoding nodes retrieved from the trie node reader** In the current implementation of the MPT, once a trie node blob is retrieved, it is passed to `decodeNode` for decoding. However, `decodeNode` assumes the supplied byte slice might be mutated later, so it performs a deep copy internally before parsing the node. Given that the node reader is implemented by the path database and the hash database, both of which guarantee the immutability of the returned byte slice. By restricting the node reader interface to explicitly guarantee that the returned byte slice will not be modified, we can safely replace `decodeNode` with `decodeNodeUnsafe`. This eliminates the need for a redundant byte copy during each node resolution. **(b) Modify the trie in place** In the current implementation of the MPT, a copy of a trie node is created before any modifications are made. These modifications include: - Node resolution: Converting the value from a hash to the actual node. - Node hashing: Tagging the hash into its cache. - Node commit: Replacing the children with its hash. - Structural changes: For example, adding a new child to a fullNode or replacing a child of a shortNode. This mechanism ensures that modifications only affect the live tree, leaving all previously created copies unaffected. Unfortunately, this property leads to a huge memory allocation requirement. For example, if we want to modify the fullNode for n times, the node will be copied for n times. In this pull request, all the trie modifications are made in place. In order to make sure all previously created copies are unaffected, the `Copy` function now will deep-copy all the live nodes rather than the root node itself. With this change, while the `Copy` function becomes more expensive, it's totally acceptable as it's not a frequently used one. For the normal trie operations (Get, GetNode, Hash, Commit, Insert, Delete), the node copy is not required anymore.
269 lines
7.8 KiB
Go
269 lines
7.8 KiB
Go
// Copyright 2014 The go-ethereum Authors
|
|
// This file is part of the go-ethereum library.
|
|
//
|
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package trie
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"strings"
|
|
|
|
"github.com/ethereum/go-ethereum/common"
|
|
"github.com/ethereum/go-ethereum/rlp"
|
|
)
|
|
|
|
var indices = []string{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "[17]"}
|
|
|
|
type node interface {
|
|
cache() (hashNode, bool)
|
|
encode(w rlp.EncoderBuffer)
|
|
fstring(string) string
|
|
}
|
|
|
|
type (
|
|
fullNode struct {
|
|
Children [17]node // Actual trie node data to encode/decode (needs custom encoder)
|
|
flags nodeFlag
|
|
}
|
|
shortNode struct {
|
|
Key []byte
|
|
Val node
|
|
flags nodeFlag
|
|
}
|
|
hashNode []byte
|
|
valueNode []byte
|
|
|
|
// fullnodeEncoder is a type used exclusively for encoding fullNode.
|
|
// Briefly instantiating a fullnodeEncoder and initializing with
|
|
// existing slices is less memory intense than using the fullNode type.
|
|
fullnodeEncoder struct {
|
|
Children [17][]byte
|
|
}
|
|
|
|
// extNodeEncoder is a type used exclusively for encoding extension node.
|
|
// Briefly instantiating a extNodeEncoder and initializing with existing
|
|
// slices is less memory intense than using the shortNode type.
|
|
extNodeEncoder struct {
|
|
Key []byte
|
|
Val []byte
|
|
}
|
|
|
|
// leafNodeEncoder is a type used exclusively for encoding leaf node.
|
|
leafNodeEncoder struct {
|
|
Key []byte
|
|
Val []byte
|
|
}
|
|
)
|
|
|
|
// nilValueNode is used when collapsing internal trie nodes for hashing, since
|
|
// unset children need to serialize correctly.
|
|
var nilValueNode = valueNode(nil)
|
|
|
|
// EncodeRLP encodes a full node into the consensus RLP format.
|
|
func (n *fullNode) EncodeRLP(w io.Writer) error {
|
|
eb := rlp.NewEncoderBuffer(w)
|
|
n.encode(eb)
|
|
return eb.Flush()
|
|
}
|
|
|
|
// nodeFlag contains caching-related metadata about a node.
|
|
type nodeFlag struct {
|
|
hash hashNode // cached hash of the node (may be nil)
|
|
dirty bool // whether the node has changes that must be written to the database
|
|
}
|
|
|
|
func (n nodeFlag) copy() nodeFlag {
|
|
return nodeFlag{
|
|
hash: common.CopyBytes(n.hash),
|
|
dirty: n.dirty,
|
|
}
|
|
}
|
|
|
|
func (n *fullNode) cache() (hashNode, bool) { return n.flags.hash, n.flags.dirty }
|
|
func (n *shortNode) cache() (hashNode, bool) { return n.flags.hash, n.flags.dirty }
|
|
func (n hashNode) cache() (hashNode, bool) { return nil, true }
|
|
func (n valueNode) cache() (hashNode, bool) { return nil, true }
|
|
|
|
// Pretty printing.
|
|
func (n *fullNode) String() string { return n.fstring("") }
|
|
func (n *shortNode) String() string { return n.fstring("") }
|
|
func (n hashNode) String() string { return n.fstring("") }
|
|
func (n valueNode) String() string { return n.fstring("") }
|
|
|
|
func (n *fullNode) fstring(ind string) string {
|
|
resp := fmt.Sprintf("[\n%s ", ind)
|
|
for i, node := range &n.Children {
|
|
if node == nil {
|
|
resp += fmt.Sprintf("%s: <nil> ", indices[i])
|
|
} else {
|
|
resp += fmt.Sprintf("%s: %v", indices[i], node.fstring(ind+" "))
|
|
}
|
|
}
|
|
return resp + fmt.Sprintf("\n%s] ", ind)
|
|
}
|
|
|
|
func (n *shortNode) fstring(ind string) string {
|
|
return fmt.Sprintf("{%x: %v} ", n.Key, n.Val.fstring(ind+" "))
|
|
}
|
|
func (n hashNode) fstring(ind string) string {
|
|
return fmt.Sprintf("<%x> ", []byte(n))
|
|
}
|
|
func (n valueNode) fstring(ind string) string {
|
|
return fmt.Sprintf("%x ", []byte(n))
|
|
}
|
|
|
|
// mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered.
|
|
func mustDecodeNode(hash, buf []byte) node {
|
|
n, err := decodeNode(hash, buf)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("node %x: %v", hash, err))
|
|
}
|
|
return n
|
|
}
|
|
|
|
// mustDecodeNodeUnsafe is a wrapper of decodeNodeUnsafe and panic if any error is
|
|
// encountered.
|
|
func mustDecodeNodeUnsafe(hash, buf []byte) node {
|
|
n, err := decodeNodeUnsafe(hash, buf)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("node %x: %v", hash, err))
|
|
}
|
|
return n
|
|
}
|
|
|
|
// decodeNode parses the RLP encoding of a trie node. It will deep-copy the passed
|
|
// byte slice for decoding, so it's safe to modify the byte slice afterwards. The-
|
|
// decode performance of this function is not optimal, but it is suitable for most
|
|
// scenarios with low performance requirements and hard to determine whether the
|
|
// byte slice be modified or not.
|
|
func decodeNode(hash, buf []byte) (node, error) {
|
|
return decodeNodeUnsafe(hash, common.CopyBytes(buf))
|
|
}
|
|
|
|
// decodeNodeUnsafe parses the RLP encoding of a trie node. The passed byte slice
|
|
// will be directly referenced by node without bytes deep copy, so the input MUST
|
|
// not be changed after.
|
|
func decodeNodeUnsafe(hash, buf []byte) (node, error) {
|
|
if len(buf) == 0 {
|
|
return nil, io.ErrUnexpectedEOF
|
|
}
|
|
elems, _, err := rlp.SplitList(buf)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("decode error: %v", err)
|
|
}
|
|
switch c, _ := rlp.CountValues(elems); c {
|
|
case 2:
|
|
n, err := decodeShort(hash, elems)
|
|
return n, wrapError(err, "short")
|
|
case 17:
|
|
n, err := decodeFull(hash, elems)
|
|
return n, wrapError(err, "full")
|
|
default:
|
|
return nil, fmt.Errorf("invalid number of list elements: %v", c)
|
|
}
|
|
}
|
|
|
|
func decodeShort(hash, elems []byte) (node, error) {
|
|
kbuf, rest, err := rlp.SplitString(elems)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
flag := nodeFlag{hash: hash}
|
|
key := compactToHex(kbuf)
|
|
if hasTerm(key) {
|
|
// value node
|
|
val, _, err := rlp.SplitString(rest)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid value node: %v", err)
|
|
}
|
|
return &shortNode{key, valueNode(val), flag}, nil
|
|
}
|
|
r, _, err := decodeRef(rest)
|
|
if err != nil {
|
|
return nil, wrapError(err, "val")
|
|
}
|
|
return &shortNode{key, r, flag}, nil
|
|
}
|
|
|
|
func decodeFull(hash, elems []byte) (*fullNode, error) {
|
|
n := &fullNode{flags: nodeFlag{hash: hash}}
|
|
for i := 0; i < 16; i++ {
|
|
cld, rest, err := decodeRef(elems)
|
|
if err != nil {
|
|
return n, wrapError(err, fmt.Sprintf("[%d]", i))
|
|
}
|
|
n.Children[i], elems = cld, rest
|
|
}
|
|
val, _, err := rlp.SplitString(elems)
|
|
if err != nil {
|
|
return n, err
|
|
}
|
|
if len(val) > 0 {
|
|
n.Children[16] = valueNode(val)
|
|
}
|
|
return n, nil
|
|
}
|
|
|
|
const hashLen = len(common.Hash{})
|
|
|
|
func decodeRef(buf []byte) (node, []byte, error) {
|
|
kind, val, rest, err := rlp.Split(buf)
|
|
if err != nil {
|
|
return nil, buf, err
|
|
}
|
|
switch {
|
|
case kind == rlp.List:
|
|
// 'embedded' node reference. The encoding must be smaller
|
|
// than a hash in order to be valid.
|
|
if size := len(buf) - len(rest); size > hashLen {
|
|
err := fmt.Errorf("oversized embedded node (size is %d bytes, want size < %d)", size, hashLen)
|
|
return nil, buf, err
|
|
}
|
|
// The buffer content has already been copied or is safe to use;
|
|
// no additional copy is required.
|
|
n, err := decodeNodeUnsafe(nil, buf)
|
|
return n, rest, err
|
|
case kind == rlp.String && len(val) == 0:
|
|
// empty node
|
|
return nil, rest, nil
|
|
case kind == rlp.String && len(val) == 32:
|
|
return hashNode(val), rest, nil
|
|
default:
|
|
return nil, nil, fmt.Errorf("invalid RLP string size %d (want 0 or 32)", len(val))
|
|
}
|
|
}
|
|
|
|
// wraps a decoding error with information about the path to the
|
|
// invalid child node (for debugging encoding issues).
|
|
type decodeError struct {
|
|
what error
|
|
stack []string
|
|
}
|
|
|
|
func wrapError(err error, ctx string) error {
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
if decErr, ok := err.(*decodeError); ok {
|
|
decErr.stack = append(decErr.stack, ctx)
|
|
return decErr
|
|
}
|
|
return &decodeError{err, []string{ctx}}
|
|
}
|
|
|
|
func (err *decodeError) Error() string {
|
|
return fmt.Sprintf("%v (decode path: %s)", err.what, strings.Join(err.stack, "<-"))
|
|
}
|