From d087178f8c89b4c8532afc36cab48c22b718ff74 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet <3272758+gballet@users.noreply.github.com> Date: Tue, 20 Jan 2026 15:11:43 +0100 Subject: [PATCH] trie: introduce expired nodes (#556) --- trie/archive/archive.go | 89 ++++++ trie/bintrie/expired_node.go | 176 +++++++++++ trie/bintrie/expired_node_test.go | 277 +++++++++++++++++ trie/expired_node.go | 97 ++++++ trie/expired_node_test.go | 491 ++++++++++++++++++++++++++++++ trie/node.go | 10 + trie/trie.go | 100 +++++- 7 files changed, 1232 insertions(+), 8 deletions(-) create mode 100644 trie/archive/archive.go create mode 100644 trie/bintrie/expired_node.go create mode 100644 trie/bintrie/expired_node_test.go create mode 100644 trie/expired_node.go create mode 100644 trie/expired_node_test.go diff --git a/trie/archive/archive.go b/trie/archive/archive.go new file mode 100644 index 0000000000..857634b75e --- /dev/null +++ b/trie/archive/archive.go @@ -0,0 +1,89 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package archive + +import ( + "bytes" + "errors" + "fmt" + "io" + "os" + + "github.com/ethereum/go-ethereum/rlp" +) + +// ResolverFn is a callback to resolve expired nodes from an archive file. +// Given an offset and size, it returns the serialized node data from the archive. +type ResolverFn func(offset, size uint64) ([]*Record, error) + +// OffsetSize is the size of the file offset in bytes. +const OffsetSize = 8 + +var ( + EmptyArchiveRecord = errors.New("empty record") // The archive contained a size-zero record. + ErrNoResolver = errors.New("no archive resolver set for expired node") // An expired node is accessed without a resolver. +) + +// Record contains an archive file record. It is not the most optimal +// structure, since any modification to it will need to be overwritten. +type Record struct { + Path []byte + Value []byte +} + +// ArchivedNodeResolver takes a buffer containing the archive data +// held by an expiring node (an offset and a size) and returns a +// list of records, which is a list of serialized leaf nodes. The +// caller knows the context (MPT, binary trie) and is responsible +// for decoding the nodes. +func ArchivedNodeResolver(offset, size uint64) ([]*Record, error) { + file, err := os.Open("nodearchive") + if err != nil { + return nil, fmt.Errorf("error opening archive file: %w", err) + } + defer file.Close() + + o, err := file.Seek(int64(offset), io.SeekStart) + if err != nil { + return nil, fmt.Errorf("error seeking into archive file: %w", err) + } + if uint64(o) != offset { + return nil, fmt.Errorf("invalid offset: want %d, got %d", offset, o) + } + + data := make([]byte, size) + if _, err := io.ReadFull(file, data); err != nil { + return nil, fmt.Errorf("error reading data from archive: %w", err) + } + + var records []*Record + for len(data) > 0 { + stream := rlp.NewStream(bytes.NewReader(data), uint64(len(data))) + _, size, err := stream.Kind() + if err != nil { + return nil, fmt.Errorf("error getting rlp kind from archive data: %w", err) + } + var record Record + err = rlp.DecodeBytes(data[:size], &record) + if err != nil { + return nil, fmt.Errorf("error decoding rlp record from archive data: %w", err) + } + data = data[size:] + records = append(records, &record) + } + return records, nil +} diff --git a/trie/bintrie/expired_node.go b/trie/bintrie/expired_node.go new file mode 100644 index 0000000000..d3b90ee9ea --- /dev/null +++ b/trie/bintrie/expired_node.go @@ -0,0 +1,176 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package bintrie + +import ( + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/trie/archive" +) + +// expiredNode represents a node whose data has been archived. +// It stores the file offset and size of the archived subtree data. +type expiredNode struct { + Offset uint64 + Size uint64 + depth int + archiveResolver archive.ResolverFn +} + +func archiveRecordsToNode(records []*archive.Record, depth int) (BinaryNode, error) { + if len(records) == 0 { + return nil, archive.EmptyArchiveRecord + } + if len(records) == 1 { + return DeserializeNode(records[0].Value, depth) + } + + var ( + newnode InternalNode + curnode *InternalNode + ) + for _, record := range records { + curnode = &newnode + resolved, err := DeserializeNode(record.Value, depth) + if err != nil { + return nil, err + } + // It's not needed to resurrect all nodes, nodes + // not along the path of what has been asked can + // be updated as expired. This is for v2. + for i, b := range record.Path { + var child BinaryNode + if b == 0 { + child = curnode.left + } else { + child = curnode.right + } + if child == nil { + if i < len(record.Path)-1 { + child = &InternalNode{depth: depth} + } else { + // Not good, I need to update the pointer + child = resolved + } + } + depth++ + } + } + return &newnode, nil +} + +func (n *expiredNode) Get(key []byte, resolver NodeResolverFn) ([]byte, error) { + if n.archiveResolver == nil { + return nil, archive.ErrNoResolver + } + records, err := n.archiveResolver(n.Offset, n.Size) + if err != nil { + return nil, fmt.Errorf("failed to resolve expired node: %w", err) + } + + resolved, err := archiveRecordsToNode(records, n.depth) + if err != nil { + return nil, fmt.Errorf("failed to deserialize expired node: %w", err) + } + return resolved.Get(key, resolver) +} + +func (n *expiredNode) Insert(key, value []byte, resolver NodeResolverFn, depth int) (BinaryNode, error) { + if n.archiveResolver == nil { + return nil, archive.ErrNoResolver + } + blob, err := n.archiveResolver(n.Offset, n.Size) + if err != nil { + return nil, fmt.Errorf("failed to resolve expired node: %w", err) + } + resolved, err := archiveRecordsToNode(blob, n.depth) + if err != nil { + return nil, fmt.Errorf("failed to deserialize expired node: %w", err) + } + return resolved.Insert(key, value, resolver, depth) +} + +func (n *expiredNode) Copy() BinaryNode { + return &expiredNode{ + Offset: n.Offset, + Size: n.Size, + depth: n.depth, + archiveResolver: n.archiveResolver, + } +} + +func (n *expiredNode) Hash() common.Hash { + return common.Hash{} +} + +func (n *expiredNode) GetValuesAtStem(stem []byte, resolver NodeResolverFn) ([][]byte, error) { + if n.archiveResolver == nil { + return nil, archive.ErrNoResolver + } + blob, err := n.archiveResolver(n.Offset, n.Size) + if err != nil { + return nil, fmt.Errorf("failed to resolve expired node: %w", err) + } + resolved, err := archiveRecordsToNode(blob, n.depth) + if err != nil { + return nil, fmt.Errorf("failed to deserialize expired node: %w", err) + } + return resolved.GetValuesAtStem(stem, resolver) +} + +func (n *expiredNode) InsertValuesAtStem(stem []byte, values [][]byte, resolver NodeResolverFn, depth int) (BinaryNode, error) { + if n.archiveResolver == nil { + return nil, archive.ErrNoResolver + } + blob, err := n.archiveResolver(n.Offset, n.Size) + if err != nil { + return nil, fmt.Errorf("failed to resolve expired node: %w", err) + } + resolved, err := archiveRecordsToNode(blob, n.depth) + if err != nil { + return nil, fmt.Errorf("failed to deserialize expired node: %w", err) + } + return resolved.InsertValuesAtStem(stem, values, resolver, depth) +} + +func (n *expiredNode) CollectNodes(path []byte, flushfn NodeFlushFn) error { + return nil +} + +func (n *expiredNode) toDot(parent, path string) string { + me := fmt.Sprintf("expired%s", path) + ret := fmt.Sprintf("%s [label=\"EXPIRED: offset=%d\"]\n", me, n.Offset) + if len(parent) > 0 { + ret = fmt.Sprintf("%s %s -> %s\n", ret, parent, me) + } + return ret +} + +func (n *expiredNode) GetHeight() int { + return 0 +} + +// SetArchiveResolver sets the resolver function for this expired node. +func (n *expiredNode) SetArchiveResolver(resolver archive.ResolverFn) { + n.archiveResolver = resolver +} + +// Depth returns the depth of this node in the trie. +func (n *expiredNode) Depth() int { + return n.depth +} diff --git a/trie/bintrie/expired_node_test.go b/trie/bintrie/expired_node_test.go new file mode 100644 index 0000000000..ca9a7548cb --- /dev/null +++ b/trie/bintrie/expired_node_test.go @@ -0,0 +1,277 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package bintrie + +import ( + "bytes" + "errors" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/trie/archive" +) + +func TestExpiredNodeSerializeDeserialize(t *testing.T) { + testCases := []struct { + offset uint64 + size uint64 + }{ + {0, 0}, + {1, 100}, + {255, 1024}, + {256, 4096}, + {1 << 16, 1 << 20}, + {1 << 32, 1 << 32}, + {1<<64 - 1, 1<<64 - 1}, + } + + for _, tc := range testCases { + original := &expiredNode{Offset: tc.offset, Size: tc.size, depth: 5} + serialized := SerializeNode(original) + + deserialized, err := DeserializeNode(serialized, 5) + if err != nil { + t.Fatalf("failed to deserialize expired node with offset %d, size %d: %v", tc.offset, tc.size, err) + } + + expNode, ok := deserialized.(*expiredNode) + if !ok { + t.Fatalf("deserialized node is not an expired node, got %T", deserialized) + } + + if expNode.Offset != original.Offset { + t.Errorf("offset mismatch: got %d, want %d", expNode.Offset, original.Offset) + } + + if expNode.Size != original.Size { + t.Errorf("size mismatch: got %d, want %d", expNode.Size, original.Size) + } + + if expNode.depth != original.depth { + t.Errorf("depth mismatch: got %d, want %d", expNode.depth, original.depth) + } + } +} + +func TestExpiredNodeSerializedFormat(t *testing.T) { + node := &expiredNode{Offset: 0x0102030405060708, Size: 0x1112131415161718, depth: 0} + serialized := SerializeNode(node) + + expected := []byte{ + nodeTypeExpired, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + } + if !bytes.Equal(serialized, expected) { + t.Errorf("serialized format mismatch: got %x, want %x", serialized, expected) + } +} + +func TestExpiredNodeSerializedSize(t *testing.T) { + node := &expiredNode{Offset: 12345, Size: 6789, depth: 0} + serialized := SerializeNode(node) + + if len(serialized) != NodeTypeBytes+2*archive.OffsetSize { + t.Errorf("serialized size mismatch: got %d, want %d", len(serialized), NodeTypeBytes+2*archive.OffsetSize) + } +} + +func TestExpiredNodeInvalidLength(t *testing.T) { + invalidCases := [][]byte{ + {nodeTypeExpired}, + {nodeTypeExpired, 0x01}, + {nodeTypeExpired, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, + {nodeTypeExpired, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}, + {nodeTypeExpired, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11}, + } + + for _, buf := range invalidCases { + _, err := DeserializeNode(buf, 0) + if err == nil { + t.Errorf("expected error for buffer length %d, got nil", len(buf)) + } + } +} + +func TestExpiredNodeHash(t *testing.T) { + node := &expiredNode{Offset: 100, depth: 5} + hash := node.Hash() + + if hash != (common.Hash{}) { + t.Errorf("expected zero hash, got %x", hash) + } +} + +func TestExpiredNodeGetHeight(t *testing.T) { + node := &expiredNode{Offset: 100, depth: 5} + height := node.GetHeight() + + if height != 0 { + t.Errorf("expected height 0, got %d", height) + } +} + +func TestExpiredNodeCollectNodes(t *testing.T) { + node := &expiredNode{Offset: 100, depth: 5} + called := false + err := node.CollectNodes(nil, func(path []byte, n BinaryNode) { + called = true + }) + + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if called { + t.Error("flush function should not be called for expired nodes") + } +} + +func TestExpiredNodeToDot(t *testing.T) { + node := &expiredNode{Offset: 12345, depth: 5} + dot := node.toDot("parent", "path") + + if dot == "" { + t.Error("toDot should return non-empty string") + } +} + +func TestExpiredNodeCopy(t *testing.T) { + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return nil, nil + } + + original := &expiredNode{ + Offset: 12345, + Size: 6789, + depth: 5, + archiveResolver: resolver, + } + + copied := original.Copy() + copiedExp, ok := copied.(*expiredNode) + if !ok { + t.Fatalf("copied node is not an expired node, got %T", copied) + } + + if copiedExp.Offset != original.Offset { + t.Errorf("offset mismatch: got %d, want %d", copiedExp.Offset, original.Offset) + } + + if copiedExp.Size != original.Size { + t.Errorf("size mismatch: got %d, want %d", copiedExp.Size, original.Size) + } + + if copiedExp.depth != original.depth { + t.Errorf("depth mismatch: got %d, want %d", copiedExp.depth, original.depth) + } + + if copiedExp.archiveResolver == nil { + t.Error("archive resolver was not copied") + } +} + +func TestExpiredNodeNoResolver(t *testing.T) { + node := &expiredNode{Offset: 100, depth: 5} + + _, err := node.Get(make([]byte, 32), nil) + if !errors.Is(err, archive.ErrNoResolver) { + t.Errorf("Get: expected archive.ErrNoResolver, got %v", err) + } + + _, err = node.Insert(make([]byte, 32), make([]byte, 32), nil, 0) + if !errors.Is(err, archive.ErrNoResolver) { + t.Errorf("Insert: expected archive.ErrNoResolver, got %v", err) + } + + _, err = node.GetValuesAtStem(make([]byte, StemSize), nil) + if !errors.Is(err, archive.ErrNoResolver) { + t.Errorf("GetValuesAtStem: expected archive.ErrNoResolver, got %v", err) + } + + _, err = node.InsertValuesAtStem(make([]byte, StemSize), make([][]byte, StemNodeWidth), nil, 0) + if !errors.Is(err, archive.ErrNoResolver) { + t.Errorf("InsertValuesAtStem: expected archive.ErrNoResolver, got %v", err) + } +} + +func TestExpiredNodeWithResolver(t *testing.T) { + var key [32]byte + copy(key[:StemSize], make([]byte, StemSize)) + key[StemSize] = 0 + + var values [StemNodeWidth][]byte + values[0] = make([]byte, HashSize) + copy(values[0], []byte("testvalue")) + + stemNode := &StemNode{ + Stem: key[:StemSize], + Values: values[:], + depth: 5, + } + serializedStem := SerializeNode(stemNode) + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + if offset == 100 { + return []*archive.Record{{Value: serializedStem}}, nil + } + return nil, errors.New("unknown offset") + } + + node := &expiredNode{ + Offset: 100, + Size: uint64(len(serializedStem)), + depth: 5, + archiveResolver: resolver, + } + + vals, err := node.GetValuesAtStem(key[:StemSize], nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if vals == nil { + t.Fatal("expected non-nil values") + } + + if !bytes.HasPrefix(vals[0], []byte("testvalue")) { + t.Errorf("value mismatch: got %q", vals[0]) + } +} + +func TestExpiredNodeDepth(t *testing.T) { + node := &expiredNode{Offset: 100, depth: 42} + if node.Depth() != 42 { + t.Errorf("expected depth 42, got %d", node.Depth()) + } +} + +func TestExpiredNodeSetArchiveResolver(t *testing.T) { + node := &expiredNode{Offset: 100, depth: 5} + + if node.archiveResolver != nil { + t.Error("expected nil archive resolver initially") + } + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return nil, nil + } + node.SetArchiveResolver(resolver) + + if node.archiveResolver == nil { + t.Error("expected non-nil archive resolver after setting") + } +} diff --git a/trie/expired_node.go b/trie/expired_node.go new file mode 100644 index 0000000000..18957ccfcc --- /dev/null +++ b/trie/expired_node.go @@ -0,0 +1,97 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "encoding/binary" + "fmt" + + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/archive" +) + +// expiredNodeMarker is a special marker byte to identify expired nodes. +// Using 0x00 as a marker since valid MPT nodes are always RLP lists (starting with 0xc0+). +const expiredNodeMarker = 0x00 + +// expiredNode represents a node whose data has been archived. +// It stores the file offset and size of the archived data. +type expiredNode struct { + offset uint64 + size uint64 + archiveResolver archive.ResolverFn +} + +func (n *expiredNode) cache() (hashNode, bool) { + return nil, true +} + +func (n *expiredNode) encode(w rlp.EncoderBuffer) { + var buf [1 + 2*archive.OffsetSize]byte + buf[0] = expiredNodeMarker + binary.BigEndian.PutUint64(buf[1:], n.offset) + binary.BigEndian.PutUint64(buf[1+archive.OffsetSize:], n.size) + w.Write(buf[:]) +} + +func (n *expiredNode) fstring(ind string) string { + return fmt.Sprintf(" ", n.offset, n.size) +} + +// Offset returns the archive file offset for this expired node. +func (n *expiredNode) Offset() uint64 { + return n.offset +} + +// SetArchiveResolver sets the resolver function for this expired node. +func (n *expiredNode) SetArchiveResolver(resolver archive.ResolverFn) { + n.archiveResolver = resolver +} + +func archiveRecordsToNode(records []*archive.Record) (node, error) { + if len(records) == 0 { + return nil, archive.EmptyArchiveRecord + } + if len(records) == 1 { + return decodeNodeUnsafe(nil, records[0].Value) + } + + var ( + newnode fullNode + curnode *fullNode + ) + for _, record := range records { + curnode = &newnode + resolved, err := decodeNodeUnsafe(nil, record.Value) + if err != nil { + return nil, err + } + // It's not needed to resurrect all nodes, nodes + // not along the path of what has been asked can + // be updated as expired. This is for v2. + for i, b := range record.Path { + if curnode.Children[b] == nil { + if i < len(record.Path)-1 { + curnode.Children[b] = &fullNode{} + } else { + curnode.Children[b] = resolved + } + } + } + } + return &newnode, nil +} diff --git a/trie/expired_node_test.go b/trie/expired_node_test.go new file mode 100644 index 0000000000..4b4267ba37 --- /dev/null +++ b/trie/expired_node_test.go @@ -0,0 +1,491 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "errors" + "testing" + + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/archive" +) + +func TestExpiredNodeEncodeDecode(t *testing.T) { + testCases := []struct { + offset uint64 + size uint64 + }{ + {0, 0}, + {1, 100}, + {255, 1024}, + {256, 4096}, + {1 << 16, 1 << 20}, + {1 << 32, 1 << 32}, + {1<<64 - 1, 1<<64 - 1}, + } + + for _, tc := range testCases { + original := &expiredNode{offset: tc.offset, size: tc.size} + + w := rlp.NewEncoderBuffer(nil) + original.encode(w) + encoded := w.ToBytes() + w.Flush() + + decoded, err := decodeNodeUnsafe(nil, encoded) + if err != nil { + t.Fatalf("failed to decode expired node with offset %d, size %d: %v", tc.offset, tc.size, err) + } + + expNode, ok := decoded.(*expiredNode) + if !ok { + t.Fatalf("decoded node is not an expired node, got %T", decoded) + } + + if expNode.offset != original.offset { + t.Errorf("offset mismatch: got %d, want %d", expNode.offset, original.offset) + } + if expNode.size != original.size { + t.Errorf("size mismatch: got %d, want %d", expNode.size, original.size) + } + } +} + +func TestExpiredNodeEncodedFormat(t *testing.T) { + node := &expiredNode{offset: 0x0102030405060708, size: 0x1112131415161718} + + w := rlp.NewEncoderBuffer(nil) + node.encode(w) + encoded := w.ToBytes() + w.Flush() + + expected := []byte{ + 0x00, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + } + if !bytes.Equal(encoded, expected) { + t.Errorf("encoded format mismatch: got %x, want %x", encoded, expected) + } +} + +func TestExpiredNodeFstring(t *testing.T) { + node := &expiredNode{offset: 12345, size: 6789} + s := node.fstring("") + if s != " " { + t.Errorf("fstring mismatch: got %q", s) + } +} + +func TestExpiredNodeCache(t *testing.T) { + node := &expiredNode{offset: 100} + hash, dirty := node.cache() + if hash != nil { + t.Error("expected nil hash from expired node cache") + } + if !dirty { + t.Error("expected dirty=true from expired node cache") + } +} + +func TestExpiredNodeInvalidLength(t *testing.T) { + invalidCases := [][]byte{ + {0x00}, + {0x00, 0x01}, + {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, + {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}, + {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11}, + } + + for _, buf := range invalidCases { + _, err := decodeNodeUnsafe(nil, buf) + if err == nil { + t.Errorf("expected error for buffer length %d, got nil", len(buf)) + } + } +} + +func TestExpiredNodeNoResolver(t *testing.T) { + tr := NewEmpty(nil) + tr.root = &expiredNode{offset: 100} + + _, err := tr.Get([]byte("key")) + if !errors.Is(err, archive.ErrNoResolver) { + t.Errorf("expected archive.ErrNoResolver, got %v", err) + } +} + +func TestExpiredNodeWithResolver(t *testing.T) { + tr := NewEmpty(nil) + + leafNode := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x12})), + Val: valueNode([]byte("testvalue")), + } + encodedLeaf := nodeToBytes(leafNode) + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + if offset == 100 { + return []*archive.Record{{Value: encodedLeaf}}, nil + } + return nil, errors.New("unknown offset") + } + + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: uint64(len(encodedLeaf)), archiveResolver: resolver} + + val, err := tr.Get([]byte{0x12}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(val) != "testvalue" { + t.Errorf("value mismatch: got %q, want %q", val, "testvalue") + } +} + +func TestExpiredNodeCopy(t *testing.T) { + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return nil, nil + } + + original := &expiredNode{ + offset: 12345, + size: 6789, + archiveResolver: resolver, + } + + copied := copyNode(original) + copiedExp, ok := copied.(*expiredNode) + if !ok { + t.Fatalf("copied node is not an expired node, got %T", copied) + } + + if copiedExp.offset != original.offset { + t.Errorf("offset mismatch: got %d, want %d", copiedExp.offset, original.offset) + } + + if copiedExp.size != original.size { + t.Errorf("size mismatch: got %d, want %d", copiedExp.size, original.size) + } + + if copiedExp.archiveResolver == nil { + t.Error("archive resolver was not copied") + } +} + +func TestArchiveRecordsToNodeEmpty(t *testing.T) { + _, err := archiveRecordsToNode([]*archive.Record{}) + if !errors.Is(err, archive.EmptyArchiveRecord) { + t.Errorf("expected EmptyArchiveRecord error, got %v", err) + } + + _, err = archiveRecordsToNode(nil) + if !errors.Is(err, archive.EmptyArchiveRecord) { + t.Errorf("expected EmptyArchiveRecord error for nil slice, got %v", err) + } +} + +func TestArchiveRecordsToNodeMultiple(t *testing.T) { + leaf1 := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x10})), + Val: valueNode([]byte("value1")), + } + leaf2 := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x20})), + Val: valueNode([]byte("value2")), + } + + records := []*archive.Record{ + {Path: []byte{0x01}, Value: nodeToBytes(leaf1)}, + {Path: []byte{0x02}, Value: nodeToBytes(leaf2)}, + } + + node, err := archiveRecordsToNode(records) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + fn, ok := node.(*fullNode) + if !ok { + t.Fatalf("expected fullNode, got %T", node) + } + + if fn.Children[0x01] == nil { + t.Error("expected child at index 0x01") + } + if fn.Children[0x02] == nil { + t.Error("expected child at index 0x02") + } +} + +func TestExpiredNodeGetMultipleRecords(t *testing.T) { + leaf1 := &shortNode{ + Key: hexToCompact([]byte{0x02, 0x03, 0x04, 16}), + Val: valueNode([]byte("value1")), + } + leaf2 := &shortNode{ + Key: hexToCompact([]byte{0x05, 0x06, 0x07, 16}), + Val: valueNode([]byte("value2")), + } + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return []*archive.Record{ + {Path: []byte{0x01}, Value: nodeToBytes(leaf1)}, + {Path: []byte{0x04}, Value: nodeToBytes(leaf2)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + val, err := tr.Get([]byte{0x12, 0x34}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(val) != "value1" { + t.Errorf("value mismatch: got %q, want %q", val, "value1") + } + + tr2 := NewEmpty(nil) + tr2.SetArchiveResolver(resolver) + tr2.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + val2, err := tr2.Get([]byte{0x45, 0x67}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(val2) != "value2" { + t.Errorf("value mismatch: got %q, want %q", val2, "value2") + } +} + +func TestExpiredNodeGetKeyNotFound(t *testing.T) { + leaf := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x12})), + Val: valueNode([]byte("value1")), + } + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return []*archive.Record{ + {Path: []byte{0x01}, Value: nodeToBytes(leaf)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + val, err := tr.Get([]byte{0xff, 0xff}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if val != nil { + t.Errorf("expected nil value for non-existent key, got %q", val) + } +} + +func TestExpiredNodeGetPathMismatch(t *testing.T) { + leaf := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x12})), + Val: valueNode([]byte("testvalue")), + } + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return []*archive.Record{ + {Path: []byte{0x01}, Value: nodeToBytes(leaf)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + val, err := tr.Get([]byte{0x19}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if val != nil { + t.Errorf("expected nil value when leaf key doesn't match, got %q", val) + } +} + +func TestExpiredNodeInsert(t *testing.T) { + leaf := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x12})), + Val: valueNode([]byte("existing")), + } + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return []*archive.Record{ + {Path: []byte{}, Value: nodeToBytes(leaf)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + err := tr.Update([]byte{0x45}, []byte("newvalue")) + if err != nil { + t.Fatalf("unexpected error on insert: %v", err) + } + + val, err := tr.Get([]byte{0x45}) + if err != nil { + t.Fatalf("unexpected error on get: %v", err) + } + if string(val) != "newvalue" { + t.Errorf("value mismatch: got %q, want %q", val, "newvalue") + } +} + +func TestExpiredNodeUpdate(t *testing.T) { + leaf := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x12})), + Val: valueNode([]byte("oldvalue")), + } + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return []*archive.Record{ + {Path: []byte{}, Value: nodeToBytes(leaf)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + err := tr.Update([]byte{0x12}, []byte("newvalue")) + if err != nil { + t.Fatalf("unexpected error on update: %v", err) + } + + val, err := tr.Get([]byte{0x12}) + if err != nil { + t.Fatalf("unexpected error on get: %v", err) + } + if string(val) != "newvalue" { + t.Errorf("value mismatch: got %q, want %q", val, "newvalue") + } +} + +func TestExpiredNodeDelete(t *testing.T) { + leaf1 := &shortNode{ + Key: hexToCompact([]byte{0x02, 16}), + Val: valueNode([]byte("value1")), + } + leaf2 := &shortNode{ + Key: hexToCompact([]byte{0x05, 16}), + Val: valueNode([]byte("value2")), + } + + branch := &fullNode{} + branch.Children[0x01] = leaf1 + branch.Children[0x04] = leaf2 + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return []*archive.Record{ + {Path: []byte{}, Value: nodeToBytes(branch)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + err := tr.Delete([]byte{0x12}) + if err != nil { + t.Fatalf("unexpected error on delete: %v", err) + } + + val, err := tr.Get([]byte{0x12}) + if err != nil { + t.Fatalf("unexpected error on get after delete: %v", err) + } + if val != nil { + t.Errorf("expected nil after delete, got %q", val) + } + + val2, err := tr.Get([]byte{0x45}) + if err != nil { + t.Fatalf("unexpected error getting other key: %v", err) + } + if string(val2) != "value2" { + t.Errorf("other value should still exist: got %q, want %q", val2, "value2") + } +} + +func TestTrieCopyPreservesArchiveResolver(t *testing.T) { + leaf := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x12})), + Val: valueNode([]byte("testvalue")), + } + + resolverCalled := false + resolver := func(offset, size uint64) ([]*archive.Record, error) { + resolverCalled = true + return []*archive.Record{ + {Path: []byte{}, Value: nodeToBytes(leaf)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + trCopy := tr.Copy() + + val, err := trCopy.Get([]byte{0x12}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !resolverCalled { + t.Error("resolver was not called on copied trie") + } + if string(val) != "testvalue" { + t.Errorf("value mismatch: got %q, want %q", val, "testvalue") + } +} + +func TestExpiredNodeGetNode(t *testing.T) { + leaf := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x12})), + Val: valueNode([]byte("testvalue")), + } + + resolverCalled := false + resolver := func(offset, size uint64) ([]*archive.Record, error) { + resolverCalled = true + return []*archive.Record{ + {Path: []byte{}, Value: nodeToBytes(leaf)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + _, _, err := tr.GetNode(hexToCompact([]byte{0x01, 0x02})) + if !resolverCalled { + t.Error("resolver was not called during GetNode") + } + if err != nil && err.Error() != "non-consensus node" { + t.Fatalf("unexpected error: %v", err) + } +} diff --git a/trie/node.go b/trie/node.go index b5094ff4b7..2556ba9f81 100644 --- a/trie/node.go +++ b/trie/node.go @@ -18,6 +18,7 @@ package trie import ( "bytes" + "encoding/binary" "fmt" "io" "strings" @@ -25,6 +26,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/archive" ) var indices = []string{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "[17]"} @@ -158,6 +160,14 @@ func decodeNodeUnsafe(hash, buf []byte) (node, error) { if len(buf) == 0 { return nil, io.ErrUnexpectedEOF } + if buf[0] == expiredNodeMarker { + if len(buf) != 1+2*archive.OffsetSize { + return nil, fmt.Errorf("invalid expired node length: %d", len(buf)) + } + offset := binary.BigEndian.Uint64(buf[1:]) + size := binary.BigEndian.Uint64(buf[1+archive.OffsetSize:]) + return &expiredNode{offset: offset, size: size, archiveResolver: archive.ArchivedNodeResolver}, nil + } elems, _, err := rlp.SplitList(buf) if err != nil { return nil, fmt.Errorf("decode error: %v", err) diff --git a/trie/trie.go b/trie/trie.go index 1ef2c2f1a6..1c9c372b58 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -26,6 +26,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie/archive" "github.com/ethereum/go-ethereum/trie/trienode" "github.com/ethereum/go-ethereum/triedb/database" "golang.org/x/sync/errgroup" @@ -57,6 +58,10 @@ type Trie struct { // reader is the handler trie can retrieve nodes from. reader *Reader + // archiveResolver is an optional callback to resolve expired nodes from + // an archive file. + archiveResolver archive.ResolverFn + // Various tracers for capturing the modifications to trie opTracer *opTracer prevalueTracer *PrevalueTracer @@ -70,17 +75,23 @@ func (t *Trie) newFlag() nodeFlag { // Copy returns a copy of Trie. func (t *Trie) Copy() *Trie { return &Trie{ - root: copyNode(t.root), - owner: t.owner, - committed: t.committed, - unhashed: t.unhashed, - uncommitted: t.uncommitted, - reader: t.reader, - opTracer: t.opTracer.copy(), - prevalueTracer: t.prevalueTracer.Copy(), + root: copyNode(t.root), + owner: t.owner, + committed: t.committed, + unhashed: t.unhashed, + uncommitted: t.uncommitted, + reader: t.reader, + archiveResolver: t.archiveResolver, + opTracer: t.opTracer.copy(), + prevalueTracer: t.prevalueTracer.Copy(), } } +// SetArchiveResolver sets the archive resolver callback for expired nodes. +func (t *Trie) SetArchiveResolver(resolver archive.ResolverFn) { + t.archiveResolver = resolver +} + // New creates the trie instance with provided trie id and the read-only // database. The state specified by trie id must be available, otherwise // an error will be returned. The trie root specified by trie id can be @@ -218,6 +229,31 @@ func (t *Trie) get(origNode node, key []byte, pos int) (value []byte, newnode no } value, newnode, _, err := t.get(child, key, pos) return value, newnode, true, err + case *expiredNode: + if t.archiveResolver == nil { + return nil, n, false, archive.ErrNoResolver + } + records, err := t.archiveResolver(n.offset, n.size) + if err != nil { + return nil, n, false, fmt.Errorf("failed to resolve expired node: %w", err) + } + newnode, err := archiveRecordsToNode(records) + for _, record := range records { + // make sure that the path up to the node matches + if bytes.HasPrefix(key[pos:], record.Path) { + resolved, err := decodeNodeUnsafe(nil, record.Value) + if err != nil { + return nil, n, false, fmt.Errorf("failed to deserialize RLP node: %w", err) + } + if leaf, ok := resolved.(*shortNode); ok { + // make sure that the key to the leaf also matches + if bytes.Equal(key[pos+len(record.Path):], leaf.Key) { + return leaf.Val.(valueNode), newnode, true, nil + } + } + } + } + return value, newnode, false, err default: panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode)) } @@ -352,6 +388,18 @@ func (t *Trie) getNode(origNode node, path []byte, pos int) (item []byte, newnod item, newnode, resolved, err := t.getNode(child, path, pos) return item, newnode, resolved + 1, err + case *expiredNode: + if t.archiveResolver == nil { + return nil, n, 0, archive.ErrNoResolver + } + records, err := t.archiveResolver(n.offset, n.size) + if err != nil { + return nil, n, 0, fmt.Errorf("failed to resolve expired node: %w", err) + } + newnode, err := archiveRecordsToNode(records) + item, newnode, resolvedCount, err := t.getNode(newnode, path, pos) + return item, newnode, resolvedCount + 1, err + default: panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode)) } @@ -475,6 +523,21 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error } return true, nn, nil + case *expiredNode: + if t.archiveResolver == nil { + return false, nil, archive.ErrNoResolver + } + records, err := t.archiveResolver(n.offset, n.size) + if err != nil { + return false, nil, fmt.Errorf("failed to resolve expired node: %w", err) + } + nn, err := archiveRecordsToNode(records) + if err != nil { + return false, nil, fmt.Errorf("failed to rebuild expired node from archive: %w", err) + } + dirty, nn, err := t.insert(nn, prefix, key, value) + return dirty && err == nil, nn, err + default: panic(fmt.Sprintf("%T: invalid node: %v", n, n)) } @@ -636,6 +699,21 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { } return true, nn, nil + case *expiredNode: + if t.archiveResolver == nil { + return false, nil, archive.ErrNoResolver + } + records, err := t.archiveResolver(n.offset, n.size) + if err != nil { + return false, nil, fmt.Errorf("failed to resolve expired node: %w", err) + } + nn, err := archiveRecordsToNode(records) + if err != nil { + return false, nil, fmt.Errorf("failed to rebuild expired node from archive: %w", err) + } + dirty, _, err := t.delete(nn, prefix, key) + return dirty && err == nil, nn, err + default: panic(fmt.Sprintf("%T: invalid node: %v (%v)", n, n, key)) } @@ -666,6 +744,12 @@ func copyNode(n node) node { } case hashNode: return n + case *expiredNode: + return &expiredNode{ + offset: n.offset, + size: n.size, + archiveResolver: n.archiveResolver, + } default: panic(fmt.Sprintf("%T: unknown node type", n)) }