From d087178f8c89b4c8532afc36cab48c22b718ff74 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet <3272758+gballet@users.noreply.github.com> Date: Tue, 20 Jan 2026 15:11:43 +0100 Subject: [PATCH 1/9] trie: introduce expired nodes (#556) --- trie/archive/archive.go | 89 ++++++ trie/bintrie/expired_node.go | 176 +++++++++++ trie/bintrie/expired_node_test.go | 277 +++++++++++++++++ trie/expired_node.go | 97 ++++++ trie/expired_node_test.go | 491 ++++++++++++++++++++++++++++++ trie/node.go | 10 + trie/trie.go | 100 +++++- 7 files changed, 1232 insertions(+), 8 deletions(-) create mode 100644 trie/archive/archive.go create mode 100644 trie/bintrie/expired_node.go create mode 100644 trie/bintrie/expired_node_test.go create mode 100644 trie/expired_node.go create mode 100644 trie/expired_node_test.go diff --git a/trie/archive/archive.go b/trie/archive/archive.go new file mode 100644 index 0000000000..857634b75e --- /dev/null +++ b/trie/archive/archive.go @@ -0,0 +1,89 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package archive + +import ( + "bytes" + "errors" + "fmt" + "io" + "os" + + "github.com/ethereum/go-ethereum/rlp" +) + +// ResolverFn is a callback to resolve expired nodes from an archive file. +// Given an offset and size, it returns the serialized node data from the archive. +type ResolverFn func(offset, size uint64) ([]*Record, error) + +// OffsetSize is the size of the file offset in bytes. +const OffsetSize = 8 + +var ( + EmptyArchiveRecord = errors.New("empty record") // The archive contained a size-zero record. + ErrNoResolver = errors.New("no archive resolver set for expired node") // An expired node is accessed without a resolver. +) + +// Record contains an archive file record. It is not the most optimal +// structure, since any modification to it will need to be overwritten. +type Record struct { + Path []byte + Value []byte +} + +// ArchivedNodeResolver takes a buffer containing the archive data +// held by an expiring node (an offset and a size) and returns a +// list of records, which is a list of serialized leaf nodes. The +// caller knows the context (MPT, binary trie) and is responsible +// for decoding the nodes. +func ArchivedNodeResolver(offset, size uint64) ([]*Record, error) { + file, err := os.Open("nodearchive") + if err != nil { + return nil, fmt.Errorf("error opening archive file: %w", err) + } + defer file.Close() + + o, err := file.Seek(int64(offset), io.SeekStart) + if err != nil { + return nil, fmt.Errorf("error seeking into archive file: %w", err) + } + if uint64(o) != offset { + return nil, fmt.Errorf("invalid offset: want %d, got %d", offset, o) + } + + data := make([]byte, size) + if _, err := io.ReadFull(file, data); err != nil { + return nil, fmt.Errorf("error reading data from archive: %w", err) + } + + var records []*Record + for len(data) > 0 { + stream := rlp.NewStream(bytes.NewReader(data), uint64(len(data))) + _, size, err := stream.Kind() + if err != nil { + return nil, fmt.Errorf("error getting rlp kind from archive data: %w", err) + } + var record Record + err = rlp.DecodeBytes(data[:size], &record) + if err != nil { + return nil, fmt.Errorf("error decoding rlp record from archive data: %w", err) + } + data = data[size:] + records = append(records, &record) + } + return records, nil +} diff --git a/trie/bintrie/expired_node.go b/trie/bintrie/expired_node.go new file mode 100644 index 0000000000..d3b90ee9ea --- /dev/null +++ b/trie/bintrie/expired_node.go @@ -0,0 +1,176 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package bintrie + +import ( + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/trie/archive" +) + +// expiredNode represents a node whose data has been archived. +// It stores the file offset and size of the archived subtree data. +type expiredNode struct { + Offset uint64 + Size uint64 + depth int + archiveResolver archive.ResolverFn +} + +func archiveRecordsToNode(records []*archive.Record, depth int) (BinaryNode, error) { + if len(records) == 0 { + return nil, archive.EmptyArchiveRecord + } + if len(records) == 1 { + return DeserializeNode(records[0].Value, depth) + } + + var ( + newnode InternalNode + curnode *InternalNode + ) + for _, record := range records { + curnode = &newnode + resolved, err := DeserializeNode(record.Value, depth) + if err != nil { + return nil, err + } + // It's not needed to resurrect all nodes, nodes + // not along the path of what has been asked can + // be updated as expired. This is for v2. + for i, b := range record.Path { + var child BinaryNode + if b == 0 { + child = curnode.left + } else { + child = curnode.right + } + if child == nil { + if i < len(record.Path)-1 { + child = &InternalNode{depth: depth} + } else { + // Not good, I need to update the pointer + child = resolved + } + } + depth++ + } + } + return &newnode, nil +} + +func (n *expiredNode) Get(key []byte, resolver NodeResolverFn) ([]byte, error) { + if n.archiveResolver == nil { + return nil, archive.ErrNoResolver + } + records, err := n.archiveResolver(n.Offset, n.Size) + if err != nil { + return nil, fmt.Errorf("failed to resolve expired node: %w", err) + } + + resolved, err := archiveRecordsToNode(records, n.depth) + if err != nil { + return nil, fmt.Errorf("failed to deserialize expired node: %w", err) + } + return resolved.Get(key, resolver) +} + +func (n *expiredNode) Insert(key, value []byte, resolver NodeResolverFn, depth int) (BinaryNode, error) { + if n.archiveResolver == nil { + return nil, archive.ErrNoResolver + } + blob, err := n.archiveResolver(n.Offset, n.Size) + if err != nil { + return nil, fmt.Errorf("failed to resolve expired node: %w", err) + } + resolved, err := archiveRecordsToNode(blob, n.depth) + if err != nil { + return nil, fmt.Errorf("failed to deserialize expired node: %w", err) + } + return resolved.Insert(key, value, resolver, depth) +} + +func (n *expiredNode) Copy() BinaryNode { + return &expiredNode{ + Offset: n.Offset, + Size: n.Size, + depth: n.depth, + archiveResolver: n.archiveResolver, + } +} + +func (n *expiredNode) Hash() common.Hash { + return common.Hash{} +} + +func (n *expiredNode) GetValuesAtStem(stem []byte, resolver NodeResolverFn) ([][]byte, error) { + if n.archiveResolver == nil { + return nil, archive.ErrNoResolver + } + blob, err := n.archiveResolver(n.Offset, n.Size) + if err != nil { + return nil, fmt.Errorf("failed to resolve expired node: %w", err) + } + resolved, err := archiveRecordsToNode(blob, n.depth) + if err != nil { + return nil, fmt.Errorf("failed to deserialize expired node: %w", err) + } + return resolved.GetValuesAtStem(stem, resolver) +} + +func (n *expiredNode) InsertValuesAtStem(stem []byte, values [][]byte, resolver NodeResolverFn, depth int) (BinaryNode, error) { + if n.archiveResolver == nil { + return nil, archive.ErrNoResolver + } + blob, err := n.archiveResolver(n.Offset, n.Size) + if err != nil { + return nil, fmt.Errorf("failed to resolve expired node: %w", err) + } + resolved, err := archiveRecordsToNode(blob, n.depth) + if err != nil { + return nil, fmt.Errorf("failed to deserialize expired node: %w", err) + } + return resolved.InsertValuesAtStem(stem, values, resolver, depth) +} + +func (n *expiredNode) CollectNodes(path []byte, flushfn NodeFlushFn) error { + return nil +} + +func (n *expiredNode) toDot(parent, path string) string { + me := fmt.Sprintf("expired%s", path) + ret := fmt.Sprintf("%s [label=\"EXPIRED: offset=%d\"]\n", me, n.Offset) + if len(parent) > 0 { + ret = fmt.Sprintf("%s %s -> %s\n", ret, parent, me) + } + return ret +} + +func (n *expiredNode) GetHeight() int { + return 0 +} + +// SetArchiveResolver sets the resolver function for this expired node. +func (n *expiredNode) SetArchiveResolver(resolver archive.ResolverFn) { + n.archiveResolver = resolver +} + +// Depth returns the depth of this node in the trie. +func (n *expiredNode) Depth() int { + return n.depth +} diff --git a/trie/bintrie/expired_node_test.go b/trie/bintrie/expired_node_test.go new file mode 100644 index 0000000000..ca9a7548cb --- /dev/null +++ b/trie/bintrie/expired_node_test.go @@ -0,0 +1,277 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package bintrie + +import ( + "bytes" + "errors" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/trie/archive" +) + +func TestExpiredNodeSerializeDeserialize(t *testing.T) { + testCases := []struct { + offset uint64 + size uint64 + }{ + {0, 0}, + {1, 100}, + {255, 1024}, + {256, 4096}, + {1 << 16, 1 << 20}, + {1 << 32, 1 << 32}, + {1<<64 - 1, 1<<64 - 1}, + } + + for _, tc := range testCases { + original := &expiredNode{Offset: tc.offset, Size: tc.size, depth: 5} + serialized := SerializeNode(original) + + deserialized, err := DeserializeNode(serialized, 5) + if err != nil { + t.Fatalf("failed to deserialize expired node with offset %d, size %d: %v", tc.offset, tc.size, err) + } + + expNode, ok := deserialized.(*expiredNode) + if !ok { + t.Fatalf("deserialized node is not an expired node, got %T", deserialized) + } + + if expNode.Offset != original.Offset { + t.Errorf("offset mismatch: got %d, want %d", expNode.Offset, original.Offset) + } + + if expNode.Size != original.Size { + t.Errorf("size mismatch: got %d, want %d", expNode.Size, original.Size) + } + + if expNode.depth != original.depth { + t.Errorf("depth mismatch: got %d, want %d", expNode.depth, original.depth) + } + } +} + +func TestExpiredNodeSerializedFormat(t *testing.T) { + node := &expiredNode{Offset: 0x0102030405060708, Size: 0x1112131415161718, depth: 0} + serialized := SerializeNode(node) + + expected := []byte{ + nodeTypeExpired, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + } + if !bytes.Equal(serialized, expected) { + t.Errorf("serialized format mismatch: got %x, want %x", serialized, expected) + } +} + +func TestExpiredNodeSerializedSize(t *testing.T) { + node := &expiredNode{Offset: 12345, Size: 6789, depth: 0} + serialized := SerializeNode(node) + + if len(serialized) != NodeTypeBytes+2*archive.OffsetSize { + t.Errorf("serialized size mismatch: got %d, want %d", len(serialized), NodeTypeBytes+2*archive.OffsetSize) + } +} + +func TestExpiredNodeInvalidLength(t *testing.T) { + invalidCases := [][]byte{ + {nodeTypeExpired}, + {nodeTypeExpired, 0x01}, + {nodeTypeExpired, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, + {nodeTypeExpired, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}, + {nodeTypeExpired, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11}, + } + + for _, buf := range invalidCases { + _, err := DeserializeNode(buf, 0) + if err == nil { + t.Errorf("expected error for buffer length %d, got nil", len(buf)) + } + } +} + +func TestExpiredNodeHash(t *testing.T) { + node := &expiredNode{Offset: 100, depth: 5} + hash := node.Hash() + + if hash != (common.Hash{}) { + t.Errorf("expected zero hash, got %x", hash) + } +} + +func TestExpiredNodeGetHeight(t *testing.T) { + node := &expiredNode{Offset: 100, depth: 5} + height := node.GetHeight() + + if height != 0 { + t.Errorf("expected height 0, got %d", height) + } +} + +func TestExpiredNodeCollectNodes(t *testing.T) { + node := &expiredNode{Offset: 100, depth: 5} + called := false + err := node.CollectNodes(nil, func(path []byte, n BinaryNode) { + called = true + }) + + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if called { + t.Error("flush function should not be called for expired nodes") + } +} + +func TestExpiredNodeToDot(t *testing.T) { + node := &expiredNode{Offset: 12345, depth: 5} + dot := node.toDot("parent", "path") + + if dot == "" { + t.Error("toDot should return non-empty string") + } +} + +func TestExpiredNodeCopy(t *testing.T) { + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return nil, nil + } + + original := &expiredNode{ + Offset: 12345, + Size: 6789, + depth: 5, + archiveResolver: resolver, + } + + copied := original.Copy() + copiedExp, ok := copied.(*expiredNode) + if !ok { + t.Fatalf("copied node is not an expired node, got %T", copied) + } + + if copiedExp.Offset != original.Offset { + t.Errorf("offset mismatch: got %d, want %d", copiedExp.Offset, original.Offset) + } + + if copiedExp.Size != original.Size { + t.Errorf("size mismatch: got %d, want %d", copiedExp.Size, original.Size) + } + + if copiedExp.depth != original.depth { + t.Errorf("depth mismatch: got %d, want %d", copiedExp.depth, original.depth) + } + + if copiedExp.archiveResolver == nil { + t.Error("archive resolver was not copied") + } +} + +func TestExpiredNodeNoResolver(t *testing.T) { + node := &expiredNode{Offset: 100, depth: 5} + + _, err := node.Get(make([]byte, 32), nil) + if !errors.Is(err, archive.ErrNoResolver) { + t.Errorf("Get: expected archive.ErrNoResolver, got %v", err) + } + + _, err = node.Insert(make([]byte, 32), make([]byte, 32), nil, 0) + if !errors.Is(err, archive.ErrNoResolver) { + t.Errorf("Insert: expected archive.ErrNoResolver, got %v", err) + } + + _, err = node.GetValuesAtStem(make([]byte, StemSize), nil) + if !errors.Is(err, archive.ErrNoResolver) { + t.Errorf("GetValuesAtStem: expected archive.ErrNoResolver, got %v", err) + } + + _, err = node.InsertValuesAtStem(make([]byte, StemSize), make([][]byte, StemNodeWidth), nil, 0) + if !errors.Is(err, archive.ErrNoResolver) { + t.Errorf("InsertValuesAtStem: expected archive.ErrNoResolver, got %v", err) + } +} + +func TestExpiredNodeWithResolver(t *testing.T) { + var key [32]byte + copy(key[:StemSize], make([]byte, StemSize)) + key[StemSize] = 0 + + var values [StemNodeWidth][]byte + values[0] = make([]byte, HashSize) + copy(values[0], []byte("testvalue")) + + stemNode := &StemNode{ + Stem: key[:StemSize], + Values: values[:], + depth: 5, + } + serializedStem := SerializeNode(stemNode) + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + if offset == 100 { + return []*archive.Record{{Value: serializedStem}}, nil + } + return nil, errors.New("unknown offset") + } + + node := &expiredNode{ + Offset: 100, + Size: uint64(len(serializedStem)), + depth: 5, + archiveResolver: resolver, + } + + vals, err := node.GetValuesAtStem(key[:StemSize], nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if vals == nil { + t.Fatal("expected non-nil values") + } + + if !bytes.HasPrefix(vals[0], []byte("testvalue")) { + t.Errorf("value mismatch: got %q", vals[0]) + } +} + +func TestExpiredNodeDepth(t *testing.T) { + node := &expiredNode{Offset: 100, depth: 42} + if node.Depth() != 42 { + t.Errorf("expected depth 42, got %d", node.Depth()) + } +} + +func TestExpiredNodeSetArchiveResolver(t *testing.T) { + node := &expiredNode{Offset: 100, depth: 5} + + if node.archiveResolver != nil { + t.Error("expected nil archive resolver initially") + } + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return nil, nil + } + node.SetArchiveResolver(resolver) + + if node.archiveResolver == nil { + t.Error("expected non-nil archive resolver after setting") + } +} diff --git a/trie/expired_node.go b/trie/expired_node.go new file mode 100644 index 0000000000..18957ccfcc --- /dev/null +++ b/trie/expired_node.go @@ -0,0 +1,97 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "encoding/binary" + "fmt" + + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/archive" +) + +// expiredNodeMarker is a special marker byte to identify expired nodes. +// Using 0x00 as a marker since valid MPT nodes are always RLP lists (starting with 0xc0+). +const expiredNodeMarker = 0x00 + +// expiredNode represents a node whose data has been archived. +// It stores the file offset and size of the archived data. +type expiredNode struct { + offset uint64 + size uint64 + archiveResolver archive.ResolverFn +} + +func (n *expiredNode) cache() (hashNode, bool) { + return nil, true +} + +func (n *expiredNode) encode(w rlp.EncoderBuffer) { + var buf [1 + 2*archive.OffsetSize]byte + buf[0] = expiredNodeMarker + binary.BigEndian.PutUint64(buf[1:], n.offset) + binary.BigEndian.PutUint64(buf[1+archive.OffsetSize:], n.size) + w.Write(buf[:]) +} + +func (n *expiredNode) fstring(ind string) string { + return fmt.Sprintf(" ", n.offset, n.size) +} + +// Offset returns the archive file offset for this expired node. +func (n *expiredNode) Offset() uint64 { + return n.offset +} + +// SetArchiveResolver sets the resolver function for this expired node. +func (n *expiredNode) SetArchiveResolver(resolver archive.ResolverFn) { + n.archiveResolver = resolver +} + +func archiveRecordsToNode(records []*archive.Record) (node, error) { + if len(records) == 0 { + return nil, archive.EmptyArchiveRecord + } + if len(records) == 1 { + return decodeNodeUnsafe(nil, records[0].Value) + } + + var ( + newnode fullNode + curnode *fullNode + ) + for _, record := range records { + curnode = &newnode + resolved, err := decodeNodeUnsafe(nil, record.Value) + if err != nil { + return nil, err + } + // It's not needed to resurrect all nodes, nodes + // not along the path of what has been asked can + // be updated as expired. This is for v2. + for i, b := range record.Path { + if curnode.Children[b] == nil { + if i < len(record.Path)-1 { + curnode.Children[b] = &fullNode{} + } else { + curnode.Children[b] = resolved + } + } + } + } + return &newnode, nil +} diff --git a/trie/expired_node_test.go b/trie/expired_node_test.go new file mode 100644 index 0000000000..4b4267ba37 --- /dev/null +++ b/trie/expired_node_test.go @@ -0,0 +1,491 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "errors" + "testing" + + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/archive" +) + +func TestExpiredNodeEncodeDecode(t *testing.T) { + testCases := []struct { + offset uint64 + size uint64 + }{ + {0, 0}, + {1, 100}, + {255, 1024}, + {256, 4096}, + {1 << 16, 1 << 20}, + {1 << 32, 1 << 32}, + {1<<64 - 1, 1<<64 - 1}, + } + + for _, tc := range testCases { + original := &expiredNode{offset: tc.offset, size: tc.size} + + w := rlp.NewEncoderBuffer(nil) + original.encode(w) + encoded := w.ToBytes() + w.Flush() + + decoded, err := decodeNodeUnsafe(nil, encoded) + if err != nil { + t.Fatalf("failed to decode expired node with offset %d, size %d: %v", tc.offset, tc.size, err) + } + + expNode, ok := decoded.(*expiredNode) + if !ok { + t.Fatalf("decoded node is not an expired node, got %T", decoded) + } + + if expNode.offset != original.offset { + t.Errorf("offset mismatch: got %d, want %d", expNode.offset, original.offset) + } + if expNode.size != original.size { + t.Errorf("size mismatch: got %d, want %d", expNode.size, original.size) + } + } +} + +func TestExpiredNodeEncodedFormat(t *testing.T) { + node := &expiredNode{offset: 0x0102030405060708, size: 0x1112131415161718} + + w := rlp.NewEncoderBuffer(nil) + node.encode(w) + encoded := w.ToBytes() + w.Flush() + + expected := []byte{ + 0x00, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + } + if !bytes.Equal(encoded, expected) { + t.Errorf("encoded format mismatch: got %x, want %x", encoded, expected) + } +} + +func TestExpiredNodeFstring(t *testing.T) { + node := &expiredNode{offset: 12345, size: 6789} + s := node.fstring("") + if s != " " { + t.Errorf("fstring mismatch: got %q", s) + } +} + +func TestExpiredNodeCache(t *testing.T) { + node := &expiredNode{offset: 100} + hash, dirty := node.cache() + if hash != nil { + t.Error("expected nil hash from expired node cache") + } + if !dirty { + t.Error("expected dirty=true from expired node cache") + } +} + +func TestExpiredNodeInvalidLength(t *testing.T) { + invalidCases := [][]byte{ + {0x00}, + {0x00, 0x01}, + {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, + {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}, + {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11}, + } + + for _, buf := range invalidCases { + _, err := decodeNodeUnsafe(nil, buf) + if err == nil { + t.Errorf("expected error for buffer length %d, got nil", len(buf)) + } + } +} + +func TestExpiredNodeNoResolver(t *testing.T) { + tr := NewEmpty(nil) + tr.root = &expiredNode{offset: 100} + + _, err := tr.Get([]byte("key")) + if !errors.Is(err, archive.ErrNoResolver) { + t.Errorf("expected archive.ErrNoResolver, got %v", err) + } +} + +func TestExpiredNodeWithResolver(t *testing.T) { + tr := NewEmpty(nil) + + leafNode := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x12})), + Val: valueNode([]byte("testvalue")), + } + encodedLeaf := nodeToBytes(leafNode) + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + if offset == 100 { + return []*archive.Record{{Value: encodedLeaf}}, nil + } + return nil, errors.New("unknown offset") + } + + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: uint64(len(encodedLeaf)), archiveResolver: resolver} + + val, err := tr.Get([]byte{0x12}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(val) != "testvalue" { + t.Errorf("value mismatch: got %q, want %q", val, "testvalue") + } +} + +func TestExpiredNodeCopy(t *testing.T) { + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return nil, nil + } + + original := &expiredNode{ + offset: 12345, + size: 6789, + archiveResolver: resolver, + } + + copied := copyNode(original) + copiedExp, ok := copied.(*expiredNode) + if !ok { + t.Fatalf("copied node is not an expired node, got %T", copied) + } + + if copiedExp.offset != original.offset { + t.Errorf("offset mismatch: got %d, want %d", copiedExp.offset, original.offset) + } + + if copiedExp.size != original.size { + t.Errorf("size mismatch: got %d, want %d", copiedExp.size, original.size) + } + + if copiedExp.archiveResolver == nil { + t.Error("archive resolver was not copied") + } +} + +func TestArchiveRecordsToNodeEmpty(t *testing.T) { + _, err := archiveRecordsToNode([]*archive.Record{}) + if !errors.Is(err, archive.EmptyArchiveRecord) { + t.Errorf("expected EmptyArchiveRecord error, got %v", err) + } + + _, err = archiveRecordsToNode(nil) + if !errors.Is(err, archive.EmptyArchiveRecord) { + t.Errorf("expected EmptyArchiveRecord error for nil slice, got %v", err) + } +} + +func TestArchiveRecordsToNodeMultiple(t *testing.T) { + leaf1 := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x10})), + Val: valueNode([]byte("value1")), + } + leaf2 := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x20})), + Val: valueNode([]byte("value2")), + } + + records := []*archive.Record{ + {Path: []byte{0x01}, Value: nodeToBytes(leaf1)}, + {Path: []byte{0x02}, Value: nodeToBytes(leaf2)}, + } + + node, err := archiveRecordsToNode(records) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + fn, ok := node.(*fullNode) + if !ok { + t.Fatalf("expected fullNode, got %T", node) + } + + if fn.Children[0x01] == nil { + t.Error("expected child at index 0x01") + } + if fn.Children[0x02] == nil { + t.Error("expected child at index 0x02") + } +} + +func TestExpiredNodeGetMultipleRecords(t *testing.T) { + leaf1 := &shortNode{ + Key: hexToCompact([]byte{0x02, 0x03, 0x04, 16}), + Val: valueNode([]byte("value1")), + } + leaf2 := &shortNode{ + Key: hexToCompact([]byte{0x05, 0x06, 0x07, 16}), + Val: valueNode([]byte("value2")), + } + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return []*archive.Record{ + {Path: []byte{0x01}, Value: nodeToBytes(leaf1)}, + {Path: []byte{0x04}, Value: nodeToBytes(leaf2)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + val, err := tr.Get([]byte{0x12, 0x34}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(val) != "value1" { + t.Errorf("value mismatch: got %q, want %q", val, "value1") + } + + tr2 := NewEmpty(nil) + tr2.SetArchiveResolver(resolver) + tr2.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + val2, err := tr2.Get([]byte{0x45, 0x67}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(val2) != "value2" { + t.Errorf("value mismatch: got %q, want %q", val2, "value2") + } +} + +func TestExpiredNodeGetKeyNotFound(t *testing.T) { + leaf := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x12})), + Val: valueNode([]byte("value1")), + } + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return []*archive.Record{ + {Path: []byte{0x01}, Value: nodeToBytes(leaf)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + val, err := tr.Get([]byte{0xff, 0xff}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if val != nil { + t.Errorf("expected nil value for non-existent key, got %q", val) + } +} + +func TestExpiredNodeGetPathMismatch(t *testing.T) { + leaf := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x12})), + Val: valueNode([]byte("testvalue")), + } + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return []*archive.Record{ + {Path: []byte{0x01}, Value: nodeToBytes(leaf)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + val, err := tr.Get([]byte{0x19}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if val != nil { + t.Errorf("expected nil value when leaf key doesn't match, got %q", val) + } +} + +func TestExpiredNodeInsert(t *testing.T) { + leaf := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x12})), + Val: valueNode([]byte("existing")), + } + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return []*archive.Record{ + {Path: []byte{}, Value: nodeToBytes(leaf)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + err := tr.Update([]byte{0x45}, []byte("newvalue")) + if err != nil { + t.Fatalf("unexpected error on insert: %v", err) + } + + val, err := tr.Get([]byte{0x45}) + if err != nil { + t.Fatalf("unexpected error on get: %v", err) + } + if string(val) != "newvalue" { + t.Errorf("value mismatch: got %q, want %q", val, "newvalue") + } +} + +func TestExpiredNodeUpdate(t *testing.T) { + leaf := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x12})), + Val: valueNode([]byte("oldvalue")), + } + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return []*archive.Record{ + {Path: []byte{}, Value: nodeToBytes(leaf)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + err := tr.Update([]byte{0x12}, []byte("newvalue")) + if err != nil { + t.Fatalf("unexpected error on update: %v", err) + } + + val, err := tr.Get([]byte{0x12}) + if err != nil { + t.Fatalf("unexpected error on get: %v", err) + } + if string(val) != "newvalue" { + t.Errorf("value mismatch: got %q, want %q", val, "newvalue") + } +} + +func TestExpiredNodeDelete(t *testing.T) { + leaf1 := &shortNode{ + Key: hexToCompact([]byte{0x02, 16}), + Val: valueNode([]byte("value1")), + } + leaf2 := &shortNode{ + Key: hexToCompact([]byte{0x05, 16}), + Val: valueNode([]byte("value2")), + } + + branch := &fullNode{} + branch.Children[0x01] = leaf1 + branch.Children[0x04] = leaf2 + + resolver := func(offset, size uint64) ([]*archive.Record, error) { + return []*archive.Record{ + {Path: []byte{}, Value: nodeToBytes(branch)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + err := tr.Delete([]byte{0x12}) + if err != nil { + t.Fatalf("unexpected error on delete: %v", err) + } + + val, err := tr.Get([]byte{0x12}) + if err != nil { + t.Fatalf("unexpected error on get after delete: %v", err) + } + if val != nil { + t.Errorf("expected nil after delete, got %q", val) + } + + val2, err := tr.Get([]byte{0x45}) + if err != nil { + t.Fatalf("unexpected error getting other key: %v", err) + } + if string(val2) != "value2" { + t.Errorf("other value should still exist: got %q, want %q", val2, "value2") + } +} + +func TestTrieCopyPreservesArchiveResolver(t *testing.T) { + leaf := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x12})), + Val: valueNode([]byte("testvalue")), + } + + resolverCalled := false + resolver := func(offset, size uint64) ([]*archive.Record, error) { + resolverCalled = true + return []*archive.Record{ + {Path: []byte{}, Value: nodeToBytes(leaf)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + trCopy := tr.Copy() + + val, err := trCopy.Get([]byte{0x12}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !resolverCalled { + t.Error("resolver was not called on copied trie") + } + if string(val) != "testvalue" { + t.Errorf("value mismatch: got %q, want %q", val, "testvalue") + } +} + +func TestExpiredNodeGetNode(t *testing.T) { + leaf := &shortNode{ + Key: hexToCompact(keybytesToHex([]byte{0x12})), + Val: valueNode([]byte("testvalue")), + } + + resolverCalled := false + resolver := func(offset, size uint64) ([]*archive.Record, error) { + resolverCalled = true + return []*archive.Record{ + {Path: []byte{}, Value: nodeToBytes(leaf)}, + }, nil + } + + tr := NewEmpty(nil) + tr.SetArchiveResolver(resolver) + tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + + _, _, err := tr.GetNode(hexToCompact([]byte{0x01, 0x02})) + if !resolverCalled { + t.Error("resolver was not called during GetNode") + } + if err != nil && err.Error() != "non-consensus node" { + t.Fatalf("unexpected error: %v", err) + } +} diff --git a/trie/node.go b/trie/node.go index b5094ff4b7..2556ba9f81 100644 --- a/trie/node.go +++ b/trie/node.go @@ -18,6 +18,7 @@ package trie import ( "bytes" + "encoding/binary" "fmt" "io" "strings" @@ -25,6 +26,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/archive" ) var indices = []string{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "[17]"} @@ -158,6 +160,14 @@ func decodeNodeUnsafe(hash, buf []byte) (node, error) { if len(buf) == 0 { return nil, io.ErrUnexpectedEOF } + if buf[0] == expiredNodeMarker { + if len(buf) != 1+2*archive.OffsetSize { + return nil, fmt.Errorf("invalid expired node length: %d", len(buf)) + } + offset := binary.BigEndian.Uint64(buf[1:]) + size := binary.BigEndian.Uint64(buf[1+archive.OffsetSize:]) + return &expiredNode{offset: offset, size: size, archiveResolver: archive.ArchivedNodeResolver}, nil + } elems, _, err := rlp.SplitList(buf) if err != nil { return nil, fmt.Errorf("decode error: %v", err) diff --git a/trie/trie.go b/trie/trie.go index 1ef2c2f1a6..1c9c372b58 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -26,6 +26,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie/archive" "github.com/ethereum/go-ethereum/trie/trienode" "github.com/ethereum/go-ethereum/triedb/database" "golang.org/x/sync/errgroup" @@ -57,6 +58,10 @@ type Trie struct { // reader is the handler trie can retrieve nodes from. reader *Reader + // archiveResolver is an optional callback to resolve expired nodes from + // an archive file. + archiveResolver archive.ResolverFn + // Various tracers for capturing the modifications to trie opTracer *opTracer prevalueTracer *PrevalueTracer @@ -70,17 +75,23 @@ func (t *Trie) newFlag() nodeFlag { // Copy returns a copy of Trie. func (t *Trie) Copy() *Trie { return &Trie{ - root: copyNode(t.root), - owner: t.owner, - committed: t.committed, - unhashed: t.unhashed, - uncommitted: t.uncommitted, - reader: t.reader, - opTracer: t.opTracer.copy(), - prevalueTracer: t.prevalueTracer.Copy(), + root: copyNode(t.root), + owner: t.owner, + committed: t.committed, + unhashed: t.unhashed, + uncommitted: t.uncommitted, + reader: t.reader, + archiveResolver: t.archiveResolver, + opTracer: t.opTracer.copy(), + prevalueTracer: t.prevalueTracer.Copy(), } } +// SetArchiveResolver sets the archive resolver callback for expired nodes. +func (t *Trie) SetArchiveResolver(resolver archive.ResolverFn) { + t.archiveResolver = resolver +} + // New creates the trie instance with provided trie id and the read-only // database. The state specified by trie id must be available, otherwise // an error will be returned. The trie root specified by trie id can be @@ -218,6 +229,31 @@ func (t *Trie) get(origNode node, key []byte, pos int) (value []byte, newnode no } value, newnode, _, err := t.get(child, key, pos) return value, newnode, true, err + case *expiredNode: + if t.archiveResolver == nil { + return nil, n, false, archive.ErrNoResolver + } + records, err := t.archiveResolver(n.offset, n.size) + if err != nil { + return nil, n, false, fmt.Errorf("failed to resolve expired node: %w", err) + } + newnode, err := archiveRecordsToNode(records) + for _, record := range records { + // make sure that the path up to the node matches + if bytes.HasPrefix(key[pos:], record.Path) { + resolved, err := decodeNodeUnsafe(nil, record.Value) + if err != nil { + return nil, n, false, fmt.Errorf("failed to deserialize RLP node: %w", err) + } + if leaf, ok := resolved.(*shortNode); ok { + // make sure that the key to the leaf also matches + if bytes.Equal(key[pos+len(record.Path):], leaf.Key) { + return leaf.Val.(valueNode), newnode, true, nil + } + } + } + } + return value, newnode, false, err default: panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode)) } @@ -352,6 +388,18 @@ func (t *Trie) getNode(origNode node, path []byte, pos int) (item []byte, newnod item, newnode, resolved, err := t.getNode(child, path, pos) return item, newnode, resolved + 1, err + case *expiredNode: + if t.archiveResolver == nil { + return nil, n, 0, archive.ErrNoResolver + } + records, err := t.archiveResolver(n.offset, n.size) + if err != nil { + return nil, n, 0, fmt.Errorf("failed to resolve expired node: %w", err) + } + newnode, err := archiveRecordsToNode(records) + item, newnode, resolvedCount, err := t.getNode(newnode, path, pos) + return item, newnode, resolvedCount + 1, err + default: panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode)) } @@ -475,6 +523,21 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error } return true, nn, nil + case *expiredNode: + if t.archiveResolver == nil { + return false, nil, archive.ErrNoResolver + } + records, err := t.archiveResolver(n.offset, n.size) + if err != nil { + return false, nil, fmt.Errorf("failed to resolve expired node: %w", err) + } + nn, err := archiveRecordsToNode(records) + if err != nil { + return false, nil, fmt.Errorf("failed to rebuild expired node from archive: %w", err) + } + dirty, nn, err := t.insert(nn, prefix, key, value) + return dirty && err == nil, nn, err + default: panic(fmt.Sprintf("%T: invalid node: %v", n, n)) } @@ -636,6 +699,21 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { } return true, nn, nil + case *expiredNode: + if t.archiveResolver == nil { + return false, nil, archive.ErrNoResolver + } + records, err := t.archiveResolver(n.offset, n.size) + if err != nil { + return false, nil, fmt.Errorf("failed to resolve expired node: %w", err) + } + nn, err := archiveRecordsToNode(records) + if err != nil { + return false, nil, fmt.Errorf("failed to rebuild expired node from archive: %w", err) + } + dirty, _, err := t.delete(nn, prefix, key) + return dirty && err == nil, nn, err + default: panic(fmt.Sprintf("%T: invalid node: %v (%v)", n, n, key)) } @@ -666,6 +744,12 @@ func copyNode(n node) node { } case hashNode: return n + case *expiredNode: + return &expiredNode{ + offset: n.offset, + size: n.size, + archiveResolver: n.archiveResolver, + } default: panic(fmt.Sprintf("%T: unknown node type", n)) } From d82e83d2d9091e8306d3dac8a00feaeddbfafeb5 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet <3272758+gballet@users.noreply.github.com> Date: Sun, 25 Jan 2026 09:05:07 +0100 Subject: [PATCH 2/9] trie, cmd/geth: add archiver command --- cmd/geth/archivecmd.go | 201 ++++++++++++++++++++ cmd/geth/main.go | 2 + trie/archive/writer.go | 92 ++++++++++ trie/archiver.go | 403 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 698 insertions(+) create mode 100644 cmd/geth/archivecmd.go create mode 100644 trie/archive/writer.go create mode 100644 trie/archiver.go diff --git a/cmd/geth/archivecmd.go b/cmd/geth/archivecmd.go new file mode 100644 index 0000000000..12712b99dd --- /dev/null +++ b/cmd/geth/archivecmd.go @@ -0,0 +1,201 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +package main + +import ( + "errors" + "fmt" + "path/filepath" + "slices" + "time" + + "github.com/ethereum/go-ethereum/cmd/utils" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/archive" + "github.com/urfave/cli/v2" +) + +var ( + // Flags for the archive command + archiveOutputFlag = &cli.StringFlag{ + Name: "output", + Usage: "Path to archive output file", + Value: "", // Default: /nodearchive + } + archiveCompactionIntervalFlag = &cli.Uint64Flag{ + Name: "compaction-interval", + Usage: "Run compaction after this many subtrees (0 = disable)", + Value: 1000, + } + archiveDryRunFlag = &cli.BoolFlag{ + Name: "dry-run", + Usage: "Simulate without modifying database", + } + + // Commands + archiveCommand = &cli.Command{ + Name: "archive", + Usage: "Archive state trie nodes to reduce database size", + Subcommands: []*cli.Command{ + archiveGenerateCmd, + }, + } + + archiveGenerateCmd = &cli.Command{ + Name: "generate", + Usage: "Generate archive files from height-3 subtrees", + ArgsUsage: "[state-root]", + Action: archiveGenerate, + Flags: slices.Concat([]cli.Flag{ + archiveOutputFlag, + archiveCompactionIntervalFlag, + archiveDryRunFlag, + }, utils.NetworkFlags, utils.DatabaseFlags), + Description: ` +Walks the state trie of the specified root (or head block) and archives +subtrees at height 3. Each archived subtree is replaced with an expiredNode +that references the archive file offset and size. + +Height is measured from leaves: leaves=0, parents=1, etc. A height-3 node +has leaves at most 3 levels below it. + +Examples: + # Archive from head state + geth archive generate --datadir /path/to/datadir + + # Dry run to see what would be archived + geth archive generate --dry-run --datadir /path/to/datadir + + # Archive from a specific state root + geth archive generate 0x1234...abcd --datadir /path/to/datadir + + # Custom output and compaction interval + geth archive generate --output /path/to/archive --compaction-interval 500 +`, + } +) + +func archiveGenerate(ctx *cli.Context) error { + // 1. Setup node and databases + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + // Open database in write mode (readOnly=false) unless dry-run + dryRun := ctx.Bool(archiveDryRunFlag.Name) + chaindb := utils.MakeChainDatabase(ctx, stack, dryRun) + defer chaindb.Close() + + // Check state scheme - we only support PathDB + scheme := cycleCheckScheme(ctx, chaindb) + if scheme != rawdb.PathScheme { + return fmt.Errorf("archive generation requires path-based state scheme, got: %s", scheme) + } + + triedb := utils.MakeTrieDatabase(ctx, stack, chaindb, false, false, false) + defer triedb.Close() + + // 2. Determine state root + var root common.Hash + if ctx.NArg() > 0 { + root = common.HexToHash(ctx.Args().First()) + log.Info("Using specified state root", "root", root) + } else { + headBlock := rawdb.ReadHeadBlock(chaindb) + if headBlock == nil { + return errors.New("no head block found - specify a state root or sync the chain first") + } + root = headBlock.Root() + log.Info("Using head block state", "number", headBlock.NumberU64(), "root", root) + } + + // Verify the state exists + if !rawdb.HasAccountTrieNode(chaindb, nil) { + return errors.New("state trie not found in database") + } + + // 3. Open archive writer (unless dry-run) + var writer *archive.ArchiveWriter + archivePath := ctx.String(archiveOutputFlag.Name) + if archivePath == "" { + archivePath = filepath.Join(stack.ResolvePath(""), "nodearchive") + } + + if !dryRun { + var err error + writer, err = archive.NewArchiveWriter(archivePath) + if err != nil { + return fmt.Errorf("failed to open archive file %s: %w", archivePath, err) + } + defer writer.Close() + log.Info("Opened archive file", "path", archivePath) + } else { + log.Info("Dry run mode - no changes will be made") + } + + // 4. Create and run archiver + archiver := trie.NewArchiver( + chaindb, + triedb, + writer, + ctx.Uint64(archiveCompactionIntervalFlag.Name), + dryRun, + ) + + start := time.Now() + if err := archiver.ProcessState(root); err != nil { + return fmt.Errorf("archive generation failed: %w", err) + } + + // 5. Get stats and optionally run final compaction + subtrees, leaves, bytesDeleted := archiver.Stats() + + if !dryRun && subtrees > 0 { + log.Info("Running final database compaction") + if err := chaindb.Compact(nil, nil); err != nil { + log.Warn("Final compaction failed", "err", err) + } + } + + // 6. Print summary + var archiveSize uint64 + if writer != nil { + archiveSize = writer.Offset() + } + + log.Info("Archive generation complete", + "subtrees", subtrees, + "leaves", leaves, + "bytesDeleted", bytesDeleted, + "archiveSize", archiveSize, + "elapsed", common.PrettyDuration(time.Since(start))) + + if dryRun { + log.Info("This was a dry run - no changes were made to the database") + } + + return nil +} + +// cycleCheckScheme returns the state scheme for the database. +// It's a helper to check what scheme is in use. +func cycleCheckScheme(ctx *cli.Context, db ethdb.Database) string { + return rawdb.ReadStateScheme(db) +} diff --git a/cmd/geth/main.go b/cmd/geth/main.go index e547256e00..0c9e71c8a1 100644 --- a/cmd/geth/main.go +++ b/cmd/geth/main.go @@ -239,6 +239,8 @@ func init() { dumpConfigCommand, // see dbcmd.go dbCommand, + // See archivecmd.go + archiveCommand, // See cmd/utils/flags_legacy.go utils.ShowDeprecated, // See snapshot.go diff --git a/trie/archive/writer.go b/trie/archive/writer.go new file mode 100644 index 0000000000..98b4ecce4b --- /dev/null +++ b/trie/archive/writer.go @@ -0,0 +1,92 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package archive + +import ( + "os" + "sync" + + "github.com/ethereum/go-ethereum/rlp" +) + +// ArchiveWriter is an append-only writer for archive files. +// It writes RLP-encoded records to a file and tracks the current offset. +type ArchiveWriter struct { + file *os.File + offset uint64 + mu sync.Mutex +} + +// NewArchiveWriter creates a new archive writer that appends to the given file. +// If the file exists, it will be opened in append mode and writing continues +// from the current end of file. If it doesn't exist, it will be created. +func NewArchiveWriter(path string) (*ArchiveWriter, error) { + file, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return nil, err + } + info, err := file.Stat() + if err != nil { + file.Close() + return nil, err + } + return &ArchiveWriter{ + file: file, + offset: uint64(info.Size()), + }, nil +} + +// WriteSubtree writes all records belonging to a subtree and returns +// the starting offset and total size of the written data. +// This is the atomic unit of archival - all records for a subtree are +// written together and can be retrieved together using the returned +// offset and size. +func (w *ArchiveWriter) WriteSubtree(records []*Record) (offset uint64, size uint64, err error) { + w.mu.Lock() + defer w.mu.Unlock() + + startOffset := w.offset + for _, rec := range records { + encoded, err := rlp.EncodeToBytes(rec) + if err != nil { + return 0, 0, err + } + if _, err := w.file.Write(encoded); err != nil { + return 0, 0, err + } + w.offset += uint64(len(encoded)) + } + return startOffset, w.offset - startOffset, nil +} + +// Sync flushes the file to disk. This should be called after writing +// a subtree and before modifying the database to ensure crash consistency. +func (w *ArchiveWriter) Sync() error { + return w.file.Sync() +} + +// Close closes the archive file. +func (w *ArchiveWriter) Close() error { + return w.file.Close() +} + +// Offset returns the current write offset in the file. +func (w *ArchiveWriter) Offset() uint64 { + w.mu.Lock() + defer w.mu.Unlock() + return w.offset +} diff --git a/trie/archiver.go b/trie/archiver.go new file mode 100644 index 0000000000..0814da10e8 --- /dev/null +++ b/trie/archiver.go @@ -0,0 +1,403 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "encoding/binary" + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/archive" + "github.com/ethereum/go-ethereum/triedb/database" +) + +// subtreeInfo holds information about a subtree to be archived. +// It contains all the data needed to write the subtree to an archive +// and replace it with an expiredNode in the database. +type subtreeInfo struct { + path []byte // Hex-encoded path to subtree root + owner common.Hash // Zero for account trie, account hash for storage + height int // Height of subtree (from leaves) + leaves []*archive.Record // All leaf records (relative path + encoded node) + nodePaths [][]byte // Paths of all nodes to delete +} + +// Archiver handles the archival process of trie nodes. +// It walks the state trie, identifies subtrees at height 3, +// archives their leaf data, and replaces them with expiredNode markers. +type Archiver struct { + db ethdb.Database + triedb database.NodeDatabase + writer *archive.ArchiveWriter + compactionInterval uint64 + dryRun bool + stateRoot common.Hash + + // Progress tracking + subtreesArchived uint64 + bytesDeleted uint64 + leavesArchived uint64 + lastCompaction uint64 +} + +// NewArchiver creates a new archiver instance. +// +// Parameters: +// - db: The underlying key-value database +// - triedb: The trie database for reading nodes +// - writer: Archive file writer (can be nil for dry run) +// - compactionInterval: Run compaction after this many subtrees (0 = disable) +// - dryRun: If true, don't modify the database +func NewArchiver(db ethdb.Database, triedb database.NodeDatabase, + writer *archive.ArchiveWriter, compactionInterval uint64, dryRun bool) *Archiver { + return &Archiver{ + db: db, + triedb: triedb, + writer: writer, + compactionInterval: compactionInterval, + dryRun: dryRun, + } +} + +// ProcessState archives subtrees from the given state root. +// It processes the account trie first, then all storage tries. +func (a *Archiver) ProcessState(root common.Hash) error { + a.stateRoot = root + + // Process account trie (owner = zero hash) + log.Info("Processing account trie", "root", root) + accountTrie, err := New(StateTrieID(root), a.triedb) + if err != nil { + return fmt.Errorf("failed to open account trie: %w", err) + } + + if err := a.processTrie(common.Hash{}, accountTrie); err != nil { + return fmt.Errorf("failed to process account trie: %w", err) + } + + // Process storage tries for accounts with storage + log.Info("Processing storage tries") + iter, err := accountTrie.NodeIterator(nil) + if err != nil { + return fmt.Errorf("failed to create account iterator: %w", err) + } + + kvIter := NewIterator(iter) + for kvIter.Next() { + // Decode the account to check for storage + var acc types.StateAccount + if err := rlp.DecodeBytes(kvIter.Value, &acc); err != nil { + log.Warn("Failed to decode account", "err", err) + continue + } + if acc.Root == types.EmptyRootHash { + continue + } + + // Process this account's storage trie + accountHash := common.BytesToHash(kvIter.Key) + storageID := StorageTrieID(root, accountHash, acc.Root) + storageTrie, err := New(storageID, a.triedb) + if err != nil { + log.Warn("Failed to open storage trie", "account", accountHash, "err", err) + continue + } + + if err := a.processTrie(accountHash, storageTrie); err != nil { + log.Warn("Failed to process storage trie", "account", accountHash, "err", err) + } + } + + if kvIter.Err != nil { + return fmt.Errorf("account iteration error: %w", kvIter.Err) + } + + return nil +} + +// processTrie finds and archives all height-3 subtrees in the trie. +func (a *Archiver) processTrie(owner common.Hash, t *Trie) error { + if t.root == nil { + return nil + } + + subtrees := a.findHeight3Subtrees(t.root, nil, owner) + log.Info("Found subtrees to archive", "owner", owner, "count", len(subtrees)) + + for _, info := range subtrees { + if err := a.archiveSubtree(info); err != nil { + log.Warn("Failed to archive subtree", "path", common.Bytes2Hex(info.path), "err", err) + continue + } + a.subtreesArchived++ + a.leavesArchived += uint64(len(info.leaves)) + + if err := a.maybeCompact(); err != nil { + log.Warn("Compaction failed", "err", err) + } + } + return nil +} + +// findHeight3Subtrees recursively finds all subtrees with height == 3. +// Height is measured from leaves: leaves=0, their parents=1, etc. +func (a *Archiver) findHeight3Subtrees(n node, path []byte, owner common.Hash) []*subtreeInfo { + info := a.computeSubtreeInfo(n, path, owner) + if info == nil { + return nil + } + + // If this subtree has height 3, it's a candidate for archival + if info.height == 3 { + return []*subtreeInfo{info} + } + + // If height > 3, recurse into children to find height-3 subtrees + if info.height > 3 { + var results []*subtreeInfo + switch n := n.(type) { + case *fullNode: + for i, child := range n.Children[:16] { + if child != nil { + childPath := append(append([]byte{}, path...), byte(i)) + results = append(results, a.findHeight3Subtrees(child, childPath, owner)...) + } + } + case *shortNode: + childPath := append(append([]byte{}, path...), n.Key...) + results = append(results, a.findHeight3Subtrees(n.Val, childPath, owner)...) + case hashNode: + // Resolve and recurse + resolved, err := a.resolveNode(n, path, owner) + if err == nil { + results = append(results, a.findHeight3Subtrees(resolved, path, owner)...) + } + } + return results + } + + // Height < 3: no archivable subtrees here + return nil +} + +// computeSubtreeInfo computes height and collects leaves for a subtree. +// Returns nil if the node is nil or an error occurs during resolution. +func (a *Archiver) computeSubtreeInfo(n node, path []byte, owner common.Hash) *subtreeInfo { + switch n := n.(type) { + case nil: + return nil + + case valueNode: + // Leaf: height 0 + // Encode the leaf as a shortNode for archive storage + return &subtreeInfo{ + path: copyBytes(path), + owner: owner, + height: 0, + leaves: []*archive.Record{{ + Path: nil, // Empty relative path for leaf at root + Value: []byte(n), + }}, + nodePaths: [][]byte{copyBytes(path)}, + } + + case *shortNode: + childPath := append(append([]byte{}, path...), n.Key...) + childInfo := a.computeSubtreeInfo(n.Val, childPath, owner) + if childInfo == nil { + return nil + } + + // Adjust relative paths in leaves to include this node's key + for _, leaf := range childInfo.leaves { + leaf.Path = append(append([]byte{}, n.Key...), leaf.Path...) + } + + return &subtreeInfo{ + path: copyBytes(path), + owner: owner, + height: childInfo.height + 1, + leaves: childInfo.leaves, + nodePaths: append([][]byte{copyBytes(path)}, childInfo.nodePaths...), + } + + case *fullNode: + var ( + maxHeight = 0 + allLeaves []*archive.Record + allPaths = [][]byte{copyBytes(path)} + ) + for i, child := range n.Children[:16] { + if child != nil { + childPath := append(append([]byte{}, path...), byte(i)) + childInfo := a.computeSubtreeInfo(child, childPath, owner) + if childInfo != nil { + if childInfo.height+1 > maxHeight { + maxHeight = childInfo.height + 1 + } + // Adjust relative paths to include the branch index + for _, leaf := range childInfo.leaves { + leaf.Path = append([]byte{byte(i)}, leaf.Path...) + } + allLeaves = append(allLeaves, childInfo.leaves...) + allPaths = append(allPaths, childInfo.nodePaths...) + } + } + } + + if len(allLeaves) == 0 { + return nil + } + + return &subtreeInfo{ + path: copyBytes(path), + owner: owner, + height: maxHeight, + leaves: allLeaves, + nodePaths: allPaths, + } + + case hashNode: + resolved, err := a.resolveNode(n, path, owner) + if err != nil { + log.Debug("Failed to resolve hashNode", "path", common.Bytes2Hex(path), "err", err) + return nil + } + return a.computeSubtreeInfo(resolved, path, owner) + + case *expiredNode: + // Already archived, skip + return nil + } + return nil +} + +// archiveSubtree writes leaves to archive and replaces subtree with expiredNode. +func (a *Archiver) archiveSubtree(info *subtreeInfo) error { + if a.dryRun { + log.Info("Would archive subtree", + "path", common.Bytes2Hex(info.path), + "owner", info.owner, + "height", info.height, + "leaves", len(info.leaves), + "nodes", len(info.nodePaths)) + return nil + } + + // 1. Write to archive file + offset, size, err := a.writer.WriteSubtree(info.leaves) + if err != nil { + return fmt.Errorf("failed to write subtree to archive: %w", err) + } + + // 2. Sync to ensure durability before modifying DB + if err := a.writer.Sync(); err != nil { + return fmt.Errorf("failed to sync archive: %w", err) + } + + // 3. Batch database operations + batch := a.db.NewBatch() + + // Delete all nodes in subtree (except the root which we'll overwrite) + for _, nodePath := range info.nodePaths[1:] { // Skip first (root) + if info.owner == (common.Hash{}) { + rawdb.DeleteAccountTrieNode(batch, nodePath) + } else { + rawdb.DeleteStorageTrieNode(batch, info.owner, nodePath) + } + a.bytesDeleted += uint64(len(nodePath)) + } + + // Write expiredNode at subtree root + expiredBlob := encodeExpiredNodeBlob(offset, size) + if info.owner == (common.Hash{}) { + rawdb.WriteAccountTrieNode(batch, info.path, expiredBlob) + } else { + rawdb.WriteStorageTrieNode(batch, info.owner, info.path, expiredBlob) + } + + if err := batch.Write(); err != nil { + return fmt.Errorf("failed to write batch: %w", err) + } + + log.Debug("Archived subtree", + "path", common.Bytes2Hex(info.path), + "owner", info.owner, + "leaves", len(info.leaves), + "offset", offset, + "size", size) + + return nil +} + +// maybeCompact runs database compaction if the threshold is reached. +func (a *Archiver) maybeCompact() error { + if a.compactionInterval == 0 { + return nil + } + if a.subtreesArchived-a.lastCompaction >= a.compactionInterval { + log.Info("Running database compaction", "subtrees", a.subtreesArchived) + if err := a.db.Compact(nil, nil); err != nil { + return err + } + a.lastCompaction = a.subtreesArchived + } + return nil +} + +// resolveNode resolves a hashNode to its actual node content. +func (a *Archiver) resolveNode(hash hashNode, path []byte, owner common.Hash) (node, error) { + reader, err := a.triedb.NodeReader(a.stateRoot) + if err != nil { + return nil, err + } + blob, err := reader.Node(owner, path, common.BytesToHash(hash)) + if err != nil { + return nil, err + } + return decodeNodeUnsafe(hash, blob) +} + +// encodeExpiredNodeBlob creates the raw bytes for an expiredNode. +// Format: 1-byte marker (0x00) + 8-byte offset + 8-byte size = 17 bytes +func encodeExpiredNodeBlob(offset, size uint64) []byte { + buf := make([]byte, 1+2*archive.OffsetSize) // 17 bytes + buf[0] = expiredNodeMarker // 0x00 + binary.BigEndian.PutUint64(buf[1:], offset) + binary.BigEndian.PutUint64(buf[1+archive.OffsetSize:], size) + return buf +} + +// Stats returns archival statistics. +func (a *Archiver) Stats() (subtrees, leaves, bytesDeleted uint64) { + return a.subtreesArchived, a.leavesArchived, a.bytesDeleted +} + +// copyBytes returns a copy of the given byte slice. +func copyBytes(b []byte) []byte { + if b == nil { + return nil + } + c := make([]byte, len(b)) + copy(c, b) + return c +} From 68621ae05a26cbb34ee5096e1eee8ea336ffea18 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet <3272758+gballet@users.noreply.github.com> Date: Sun, 1 Feb 2026 22:30:11 +0100 Subject: [PATCH 3/9] a few fixes after hoodi testing --- trie/archive/archive.go | 8 +++--- trie/archiver.go | 12 ++++----- trie/trie.go | 58 ++++++++++++++++++----------------------- triedb/pathdb/reader.go | 2 +- 4 files changed, 37 insertions(+), 43 deletions(-) diff --git a/trie/archive/archive.go b/trie/archive/archive.go index 857634b75e..e1129b65fc 100644 --- a/trie/archive/archive.go +++ b/trie/archive/archive.go @@ -71,18 +71,20 @@ func ArchivedNodeResolver(offset, size uint64) ([]*Record, error) { } var records []*Record + stream := rlp.NewStream(bytes.NewReader(data), uint64(len(data))) for len(data) > 0 { - stream := rlp.NewStream(bytes.NewReader(data), uint64(len(data))) _, size, err := stream.Kind() + if err == io.EOF { + break + } if err != nil { return nil, fmt.Errorf("error getting rlp kind from archive data: %w", err) } var record Record - err = rlp.DecodeBytes(data[:size], &record) + err = stream.Decode(&record) if err != nil { return nil, fmt.Errorf("error decoding rlp record from archive data: %w", err) } - data = data[size:] records = append(records, &record) } return records, nil diff --git a/trie/archiver.go b/trie/archiver.go index 0814da10e8..b24ac18c4a 100644 --- a/trie/archiver.go +++ b/trie/archiver.go @@ -83,18 +83,11 @@ func NewArchiver(db ethdb.Database, triedb database.NodeDatabase, func (a *Archiver) ProcessState(root common.Hash) error { a.stateRoot = root - // Process account trie (owner = zero hash) - log.Info("Processing account trie", "root", root) accountTrie, err := New(StateTrieID(root), a.triedb) if err != nil { return fmt.Errorf("failed to open account trie: %w", err) } - if err := a.processTrie(common.Hash{}, accountTrie); err != nil { - return fmt.Errorf("failed to process account trie: %w", err) - } - - // Process storage tries for accounts with storage log.Info("Processing storage tries") iter, err := accountTrie.NodeIterator(nil) if err != nil { @@ -131,6 +124,11 @@ func (a *Archiver) ProcessState(root common.Hash) error { return fmt.Errorf("account iteration error: %w", kvIter.Err) } + log.Info("Processing account trie", "root", root) + if err := a.processTrie(common.Hash{}, accountTrie); err != nil { + return fmt.Errorf("failed to process account trie: %w", err) + } + return nil } diff --git a/trie/trie.go b/trie/trie.go index 1c9c372b58..5f2cdcdcfe 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -230,30 +230,33 @@ func (t *Trie) get(origNode node, key []byte, pos int) (value []byte, newnode no value, newnode, _, err := t.get(child, key, pos) return value, newnode, true, err case *expiredNode: - if t.archiveResolver == nil { - return nil, n, false, archive.ErrNoResolver - } - records, err := t.archiveResolver(n.offset, n.size) + records, err := archive.ArchivedNodeResolver(n.offset, n.size) if err != nil { return nil, n, false, fmt.Errorf("failed to resolve expired node: %w", err) } newnode, err := archiveRecordsToNode(records) - for _, record := range records { - // make sure that the path up to the node matches - if bytes.HasPrefix(key[pos:], record.Path) { - resolved, err := decodeNodeUnsafe(nil, record.Value) - if err != nil { - return nil, n, false, fmt.Errorf("failed to deserialize RLP node: %w", err) - } - if leaf, ok := resolved.(*shortNode); ok { - // make sure that the key to the leaf also matches - if bytes.Equal(key[pos+len(record.Path):], leaf.Key) { - return leaf.Val.(valueNode), newnode, true, nil - } - } - } + // alternative: don't rebuild, just find the value + // for _, record := range records { + // // make sure that the path up to the node matches + // if bytes.HasPrefix(key[pos:], record.Path) { + // resolved, err := decodeNodeUnsafe(nil, record.Value) + // if err != nil { + // fmt.Printf("%v %x\n", record.Path, record.Value) + // return nil, n, false, fmt.Errorf("failed to deserialize RLP node: %w", err) + // } + // if leaf, ok := resolved.(*shortNode); ok { + // // make sure that the key to the leaf also matches + // if bytes.Equal(key[pos+len(record.Path):], leaf.Key) { + // return leaf.Val.(valueNode), newnode, true, nil + // } + // } + // } + // } + if err != nil { + return nil, n, false, err } - return value, newnode, false, err + value, _, _, err = t.get(newnode, key, pos+1) + return value, newnode, true, err default: panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode)) } @@ -389,10 +392,7 @@ func (t *Trie) getNode(origNode node, path []byte, pos int) (item []byte, newnod return item, newnode, resolved + 1, err case *expiredNode: - if t.archiveResolver == nil { - return nil, n, 0, archive.ErrNoResolver - } - records, err := t.archiveResolver(n.offset, n.size) + records, err := archive.ArchivedNodeResolver(n.offset, n.size) if err != nil { return nil, n, 0, fmt.Errorf("failed to resolve expired node: %w", err) } @@ -524,10 +524,7 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error return true, nn, nil case *expiredNode: - if t.archiveResolver == nil { - return false, nil, archive.ErrNoResolver - } - records, err := t.archiveResolver(n.offset, n.size) + records, err := archive.ArchivedNodeResolver(n.offset, n.size) if err != nil { return false, nil, fmt.Errorf("failed to resolve expired node: %w", err) } @@ -700,10 +697,7 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { return true, nn, nil case *expiredNode: - if t.archiveResolver == nil { - return false, nil, archive.ErrNoResolver - } - records, err := t.archiveResolver(n.offset, n.size) + records, err := archive.ArchivedNodeResolver(n.offset, n.size) if err != nil { return false, nil, fmt.Errorf("failed to resolve expired node: %w", err) } @@ -748,7 +742,7 @@ func copyNode(n node) node { return &expiredNode{ offset: n.offset, size: n.size, - archiveResolver: n.archiveResolver, + archiveResolver: archive.ArchivedNodeResolver, } default: panic(fmt.Sprintf("%T: unknown node type", n)) diff --git a/triedb/pathdb/reader.go b/triedb/pathdb/reader.go index e087ef26ed..845667b578 100644 --- a/triedb/pathdb/reader.go +++ b/triedb/pathdb/reader.go @@ -69,7 +69,7 @@ func (r *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, return nil, err } // Error out if the local one is inconsistent with the target. - if !r.noHashCheck && got != hash { + if !r.noHashCheck && (len(blob) > 0 && blob[0] != 0) && got != hash { // Location is always available even if the node // is not found. switch loc.loc { From 9606dbcb195a46c5695097ce9107d4ec3346f4e8 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet <3272758+gballet@users.noreply.github.com> Date: Mon, 2 Feb 2026 13:55:14 +0100 Subject: [PATCH 4/9] fix leaf reconstruction --- trie/expired_node.go | 116 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 97 insertions(+), 19 deletions(-) diff --git a/trie/expired_node.go b/trie/expired_node.go index 18957ccfcc..27fef5dc9c 100644 --- a/trie/expired_node.go +++ b/trie/expired_node.go @@ -67,31 +67,109 @@ func archiveRecordsToNode(records []*archive.Record) (node, error) { return nil, archive.EmptyArchiveRecord } if len(records) == 1 { - return decodeNodeUnsafe(nil, records[0].Value) + return buildLeafFromRecord(records[0]) } - var ( - newnode fullNode - curnode *fullNode - ) - for _, record := range records { - curnode = &newnode - resolved, err := decodeNodeUnsafe(nil, record.Value) + var newnode fullNode + for i, record := range records { + if err := validateRecordPath(record.Path); err != nil { + return nil, err + } + + // we are not in the case of a single leaf node, so each + // path should be at least 2 nibbles (terminator included) + if len(record.Path) < 2 || !hasTerm(record.Path) { + return nil, fmt.Errorf("invalid record path for non-leaf node #%d: %v", i, record.Path) + } + key, err := normalizeRecordKey(record.Path) if err != nil { return nil, err } - // It's not needed to resurrect all nodes, nodes - // not along the path of what has been asked can - // be updated as expired. This is for v2. - for i, b := range record.Path { - if curnode.Children[b] == nil { - if i < len(record.Path)-1 { - curnode.Children[b] = &fullNode{} - } else { - curnode.Children[b] = resolved - } - } + child, err := insertTrieNode(newnode.Children[key[0]], key[1:], valueNode(record.Value)) + if err != nil { + return nil, err } + newnode.Children[key[0]] = child } return &newnode, nil } + +func validateRecordPath(path []byte) error { + for i, b := range path { + if b > 16 { + return fmt.Errorf("invalid nibble in record path: %d", b) + } + if b == 16 && i != len(path)-1 { + return fmt.Errorf("terminator nibble in middle of record path") + } + } + return nil +} + +func buildLeafFromRecord(record *archive.Record) (node, error) { + key, err := normalizeRecordKey(record.Path) + if err != nil { + return nil, err + } + return &shortNode{Key: key, Val: valueNode(record.Value)}, nil +} + +// normalizeRecordKey ensures the record path is a hex-nibble key suitable for +// leaf insertion by guaranteeing a single terminator nibble and preserving any +// already-terminated path. Empty paths are normalized to a sole terminator. +func normalizeRecordKey(path []byte) ([]byte, error) { + if len(path) == 0 { + return []byte{16}, nil + } + if hasTerm(path) { + return path, nil + } + key := append([]byte{}, path...) + key = append(key, 16) + return key, nil +} + +func insertTrieNode(n node, key []byte, value node) (node, error) { + if len(key) == 0 { + return value, nil + } + switch n := n.(type) { + case *shortNode: + matchlen := prefixLen(key, n.Key) + if matchlen == len(n.Key) { + nn, err := insertTrieNode(n.Val, key[matchlen:], value) + if err != nil { + return nil, err + } + return &shortNode{Key: n.Key, Val: nn}, nil + } + branch := &fullNode{} + var err error + branch.Children[n.Key[matchlen]], err = insertTrieNode(nil, n.Key[matchlen+1:], n.Val) + if err != nil { + return nil, err + } + branch.Children[key[matchlen]], err = insertTrieNode(nil, key[matchlen+1:], value) + if err != nil { + return nil, err + } + if matchlen == 0 { + return branch, nil + } + return &shortNode{Key: key[:matchlen], Val: branch}, nil + + case *fullNode: + child, err := insertTrieNode(n.Children[key[0]], key[1:], value) + if err != nil { + return nil, err + } + n.Children[key[0]] = child + return n, nil + + case nil: + return &shortNode{Key: key, Val: value}, nil + + default: + return nil, fmt.Errorf("invalid node type in trie insert: %T", n) + } +} From 8d2125e4fdeed00373a2708d6d734b45614ae5a5 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet <3272758+gballet@users.noreply.github.com> Date: Mon, 2 Feb 2026 16:05:23 +0100 Subject: [PATCH 5/9] fix linter error --- trie/archive/archive.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trie/archive/archive.go b/trie/archive/archive.go index e1129b65fc..fef59e3395 100644 --- a/trie/archive/archive.go +++ b/trie/archive/archive.go @@ -83,7 +83,7 @@ func ArchivedNodeResolver(offset, size uint64) ([]*Record, error) { var record Record err = stream.Decode(&record) if err != nil { - return nil, fmt.Errorf("error decoding rlp record from archive data: %w", err) + return nil, fmt.Errorf("error decoding rlp record from archive data (offset=%d, size=%d): %w", offset, size, err) } records = append(records, &record) } From 5119945e25ddb794255767f1ed14cbcb00b80cd1 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet <3272758+gballet@users.noreply.github.com> Date: Fri, 13 Feb 2026 09:13:23 +0100 Subject: [PATCH 6/9] trie, cmd/geth: add archive verify command, Walk(), and archiver improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Trie.Walk() for exhaustive traversal that resolves expired nodes with hash verification. Add `archive verify` subcommand that walks the full state (account + storage tries) to validate all archived data can be correctly resurrected. Delete both the journal KV entry and file after archiving to force geth to restart with a bare disk layer, rewinding the chain head to the persistent disk state and re-executing blocks. Also adds markSubtreeDirty() to resolveExpiredNodeData() so that all nodes in a resolved expired subtree are captured in the NodeSet during commit — preventing them from being lost between diff layers and the raw DB. --- cmd/geth/archivecmd.go | 418 ++++++++++++++++++++++++++++++++++--- node/node.go | 2 + trie/archive/archive.go | 6 +- trie/archiver.go | 140 +++++++++---- trie/committer.go | 2 + trie/expired_node.go | 101 +++++++-- trie/expired_node_test.go | 414 ++++++++++++++++++++++-------------- trie/hasher.go | 23 ++ trie/node.go | 4 +- trie/proof.go | 13 ++ trie/trie.go | 121 +++++++---- triedb/database.go | 22 ++ triedb/pathdb/database.go | 24 +++ triedb/pathdb/history.go | 10 +- triedb/pathdb/layertree.go | 14 ++ 15 files changed, 1032 insertions(+), 282 deletions(-) diff --git a/cmd/geth/archivecmd.go b/cmd/geth/archivecmd.go index 12712b99dd..a2c6f41b5a 100644 --- a/cmd/geth/archivecmd.go +++ b/cmd/geth/archivecmd.go @@ -17,8 +17,10 @@ package main import ( + "encoding/binary" "errors" "fmt" + "os" "path/filepath" "slices" "time" @@ -26,10 +28,14 @@ import ( "github.com/ethereum/go-ethereum/cmd/utils" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/archive" + "github.com/ethereum/go-ethereum/triedb/database" "github.com/urfave/cli/v2" ) @@ -51,14 +57,73 @@ var ( } // Commands + archiveCheckNodeFlag = &cli.StringFlag{ + Name: "owner", + Usage: "Owner hash (hex) for the trie node to check", + } + archiveCheckPathFlag = &cli.StringFlag{ + Name: "path", + Usage: "Path (hex nibbles) of the trie node to check", + } + archiveCommand = &cli.Command{ Name: "archive", Usage: "Archive state trie nodes to reduce database size", Subcommands: []*cli.Command{ archiveGenerateCmd, + archiveVerifyCmd, + archiveDeleteJournalCmd, + archiveCheckNodeCmd, }, } + archiveCheckNodeCmd = &cli.Command{ + Name: "check-node", + Usage: "Check if a specific trie node exists in the raw DB", + Action: archiveCheckNode, + Flags: slices.Concat([]cli.Flag{ + archiveCheckNodeFlag, + archiveCheckPathFlag, + }, utils.NetworkFlags, utils.DatabaseFlags), + } + + archiveDeleteJournalCmd = &cli.Command{ + Name: "delete-journal", + Usage: "Delete the pathdb journal to force a clean restart", + Action: archiveDeleteJournal, + Flags: slices.Concat(utils.NetworkFlags, utils.DatabaseFlags), + Description: ` +Deletes the pathdb journal (TrieJournal key and merkle.journal file) from the +database. This forces geth to restart with a bare disk layer, discarding any +in-memory diff layers that may be inconsistent with archived state. + +Use this after running 'archive generate' if geth was started in between and +recreated the journal. + +Examples: + geth archive delete-journal --datadir /path/to/datadir + geth archive delete-journal --hoodi +`, + } + + archiveVerifyCmd = &cli.Command{ + Name: "verify", + Usage: "Verify all archived nodes can be correctly resurrected", + Action: archiveVerify, + Flags: slices.Concat(utils.NetworkFlags, utils.DatabaseFlags), + Description: ` +Walks the entire state trie, resolving every expired node from the archive +file and verifying that the reconstructed subtree hash matches the original. +Also walks all storage tries referenced by accounts. + +The database is opened read-only. No modifications are made. + +Examples: + geth archive verify --datadir /path/to/datadir + geth archive verify --hoodi +`, + } + archiveGenerateCmd = &cli.Command{ Name: "generate", Usage: "Generate archive files from height-3 subtrees", @@ -77,22 +142,56 @@ that references the archive file offset and size. Height is measured from leaves: leaves=0, parents=1, etc. A height-3 node has leaves at most 3 levels below it. +The archiver reads trie nodes directly from the persistent database layer, +bypassing any in-memory diff layers. This ensures consistency between the +data it reads and the data it modifies. + Examples: - # Archive from head state + # Archive from the persistent disk state geth archive generate --datadir /path/to/datadir # Dry run to see what would be archived geth archive generate --dry-run --datadir /path/to/datadir - # Archive from a specific state root - geth archive generate 0x1234...abcd --datadir /path/to/datadir - # Custom output and compaction interval geth archive generate --output /path/to/archive --compaction-interval 500 `, } ) +// rawDBNodeReader implements database.NodeReader by reading trie nodes directly +// from the raw key-value database, bypassing pathdb's in-memory diff layers. +// This ensures the archiver sees the same trie state it modifies. +type rawDBNodeReader struct { + db ethdb.KeyValueReader +} + +func (r *rawDBNodeReader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) { + var blob []byte + if owner == (common.Hash{}) { + blob = rawdb.ReadAccountTrieNode(r.db, path) + } else { + blob = rawdb.ReadStorageTrieNode(r.db, owner, path) + } + // Skip hash verification: the raw DB may contain expiredNode markers + // (blob[0] == 0x00) which have different hashes than the original nodes. + return blob, nil +} + +// rawDBNodeDatabase implements database.NodeDatabase using direct raw DB reads. +type rawDBNodeDatabase struct { + db ethdb.KeyValueReader + root common.Hash +} + +func (d *rawDBNodeDatabase) NodeReader(stateRoot common.Hash) (database.NodeReader, error) { + // Only allow reading the persistent disk root state + if stateRoot != d.root { + return nil, fmt.Errorf("raw DB reader only supports disk root %x, got %x", d.root, stateRoot) + } + return &rawDBNodeReader{db: d.db}, nil +} + func archiveGenerate(ctx *cli.Context) error { // 1. Setup node and databases stack, _ := makeConfigNode(ctx) @@ -109,27 +208,25 @@ func archiveGenerate(ctx *cli.Context) error { return fmt.Errorf("archive generation requires path-based state scheme, got: %s", scheme) } - triedb := utils.MakeTrieDatabase(ctx, stack, chaindb, false, false, false) - defer triedb.Close() - - // 2. Determine state root - var root common.Hash - if ctx.NArg() > 0 { - root = common.HexToHash(ctx.Args().First()) - log.Info("Using specified state root", "root", root) - } else { - headBlock := rawdb.ReadHeadBlock(chaindb) - if headBlock == nil { - return errors.New("no head block found - specify a state root or sync the chain first") - } - root = headBlock.Root() - log.Info("Using head block state", "number", headBlock.NumberU64(), "root", root) - } - - // Verify the state exists - if !rawdb.HasAccountTrieNode(chaindb, nil) { + // 2. Determine the persistent disk state root. + // + // The archiver reads and writes directly to the raw key-value database, + // bypassing pathdb's in-memory diff layers. This avoids the inconsistency + // where diff layers shadow expiredNode markers written to disk. + // + // The disk root is computed by hashing the account trie root node stored + // in the raw database. This root corresponds to the last state that was + // fully persisted (i.e., PersistentStateID), which matches the canonical + // chain head. + rootBlob := rawdb.ReadAccountTrieNode(chaindb, nil) + if len(rootBlob) == 0 { return errors.New("state trie not found in database") } + root := crypto.Keccak256Hash(rootBlob) + log.Info("Using persistent disk state root", "root", root) + + // Create a raw DB node reader that bypasses pathdb layers + nodeDB := &rawDBNodeDatabase{db: chaindb, root: root} // 3. Open archive writer (unless dry-run) var writer *archive.ArchiveWriter @@ -153,7 +250,7 @@ func archiveGenerate(ctx *cli.Context) error { // 4. Create and run archiver archiver := trie.NewArchiver( chaindb, - triedb, + nodeDB, writer, ctx.Uint64(archiveCompactionIntervalFlag.Name), dryRun, @@ -174,6 +271,27 @@ func archiveGenerate(ctx *cli.Context) error { } } + if !dryRun { + // Delete the pathdb journal. The archiver modified the raw DB + // underneath the diff layers, so the journal's buffered state is + // inconsistent. Deleting forces geth to restart with a bare disk + // layer and rewind the chain head to the disk state. + if err := chaindb.Delete([]byte("TrieJournal")); err != nil { + log.Warn("Failed to delete pathdb journal key", "err", err) + } + log.Info("Deleted pathdb journal to force clean restart") + + // Delete journal file(s) - check both legacy and current locations + for _, dir := range []string{"triedb", ""} { + for _, name := range []string{"merkle.journal", "verkle.journal"} { + journalFile := filepath.Join(stack.ResolvePath(dir), name) + if err := os.Remove(journalFile); err == nil { + log.Info("Deleted journal file", "path", journalFile) + } + } + } + } + // 6. Print summary var archiveSize uint64 if writer != nil { @@ -194,6 +312,258 @@ func archiveGenerate(ctx *cli.Context) error { return nil } +func archiveVerify(ctx *cli.Context) error { + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + // Open database read-only + chaindb := utils.MakeChainDatabase(ctx, stack, true) + defer chaindb.Close() + + scheme := cycleCheckScheme(ctx, chaindb) + if scheme != rawdb.PathScheme { + return fmt.Errorf("archive verify requires path-based state scheme, got: %s", scheme) + } + + // Set archive data dir so ArchivedNodeResolver can find the file + // ResolvePath("") returns the node's data directory (e.g. .ethereum/hoodi/geth), + // but ArchivedNodeResolver expects the instance directory (.ethereum/hoodi) + // since it appends "geth/nodearchive" itself. + archive.ArchiveDataDir = filepath.Dir(stack.ResolvePath("")) + + // Compute disk root + rootBlob := rawdb.ReadAccountTrieNode(chaindb, nil) + if len(rootBlob) == 0 { + return errors.New("state trie not found in database") + } + root := crypto.Keccak256Hash(rootBlob) + log.Info("Verifying archived nodes", "root", root) + + nodeDB := &rawDBNodeDatabase{db: chaindb, root: root} + + // Open account trie + accountTrie, err := trie.New(trie.StateTrieID(root), nodeDB) + if err != nil { + return fmt.Errorf("failed to open account trie: %w", err) + } + + var ( + totalAccounts int + totalStorageTries int + totalLeaves int + totalExpired int + totalErrors int + start = time.Now() + lastLog = time.Now() + ) + + // Walk the account trie — this resolves all expired nodes and verifies hashes + accountStats, err := accountTrie.Walk(func(path []byte, value []byte) error { + totalAccounts++ + if time.Since(lastLog) > 30*time.Second { + log.Info("Verification progress", + "accounts", totalAccounts, + "storageTries", totalStorageTries, + "leaves", totalLeaves, + "expired", totalExpired, + "errors", totalErrors) + lastLog = time.Now() + } + + // Decode account to check for storage trie + var acc types.StateAccount + if err := rlp.DecodeBytes(value, &acc); err != nil { + log.Warn("Failed to decode account", "err", err) + totalErrors++ + return nil // continue walking + } + if acc.Root == types.EmptyRootHash { + return nil + } + + // Open and walk storage trie. + // path is hex-nibble encoded (with a 16 terminator from the trie key), + // so convert nibble pairs back to the 32-byte account hash. + nibbles := path + if len(nibbles) > 0 && nibbles[len(nibbles)-1] == 16 { + nibbles = nibbles[:len(nibbles)-1] + } + keyBytes := make([]byte, len(nibbles)/2) + for i := 0; i < len(nibbles); i += 2 { + keyBytes[i/2] = nibbles[i]<<4 | nibbles[i+1] + } + accountHash := common.BytesToHash(keyBytes) + storageID := trie.StorageTrieID(root, accountHash, acc.Root) + storageTrie, err := trie.New(storageID, nodeDB) + if err != nil { + log.Warn("Failed to open storage trie", "account", accountHash, "err", err) + totalErrors++ + return nil + } + + storageStats, err := storageTrie.Walk(func(spath []byte, svalue []byte) error { + return nil + }) + if err != nil { + log.Warn("Storage trie walk failed", "account", accountHash, "err", err) + totalErrors++ + return nil + } + totalStorageTries++ + totalLeaves += storageStats.Leaves + totalExpired += storageStats.ExpiredResolved + return nil + }) + if err != nil { + return fmt.Errorf("account trie walk failed: %w", err) + } + + totalLeaves += accountStats.Leaves + totalExpired += accountStats.ExpiredResolved + + log.Info("Archive verification complete", + "accounts", totalAccounts, + "storageTries", totalStorageTries, + "totalLeaves", totalLeaves, + "expiredResolved", totalExpired, + "errors", totalErrors, + "elapsed", common.PrettyDuration(time.Since(start))) + + if totalErrors > 0 { + return fmt.Errorf("verification completed with %d errors", totalErrors) + } + return nil +} + +func archiveDeleteJournal(ctx *cli.Context) error { + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + chaindb := utils.MakeChainDatabase(ctx, stack, false) + defer chaindb.Close() + + // Delete the pathdb journal KV key + if err := chaindb.Delete([]byte("TrieJournal")); err != nil { + log.Warn("Failed to delete pathdb journal key", "err", err) + } else { + log.Info("Deleted pathdb journal key (TrieJournal)") + } + + // Delete the journal file(s) - check both legacy and current locations + for _, dir := range []string{"triedb", ""} { + for _, name := range []string{"merkle.journal", "verkle.journal"} { + journalFile := filepath.Join(stack.ResolvePath(dir), name) + if err := os.Remove(journalFile); err == nil { + log.Info("Deleted journal file", "path", journalFile) + } else if !os.IsNotExist(err) { + log.Warn("Failed to delete journal file", "path", journalFile, "err", err) + } + } + } + + return nil +} + +func archiveCheckNode(ctx *cli.Context) error { + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + chaindb := utils.MakeChainDatabase(ctx, stack, true) + defer chaindb.Close() + + ownerHex := ctx.String(archiveCheckNodeFlag.Name) + pathHex := ctx.String(archiveCheckPathFlag.Name) + + if ownerHex == "" { + return errors.New("--owner flag is required") + } + + owner := common.HexToHash(ownerHex) + + // Parse path: hex nibbles like "08" → []byte{0, 8} + var path []byte + for _, c := range pathHex { + var nibble byte + switch { + case c >= '0' && c <= '9': + nibble = byte(c - '0') + case c >= 'a' && c <= 'f': + nibble = byte(c-'a') + 10 + case c >= 'A' && c <= 'F': + nibble = byte(c-'A') + 10 + default: + return fmt.Errorf("invalid hex char in path: %c", c) + } + path = append(path, nibble) + } + + log.Info("Checking node in raw DB", "owner", owner, "path", fmt.Sprintf("%x", path)) + + // Read the node directly from the raw DB + isAccount := owner == (common.Hash{}) + + // Check the target path and all prefixes up to root + for i := len(path); i >= 0; i-- { + subpath := path[:i] + var blob []byte + if isAccount { + blob = rawdb.ReadAccountTrieNode(chaindb, subpath) + } else { + blob = rawdb.ReadStorageTrieNode(chaindb, owner, subpath) + } + + status := "MISSING" + details := "" + if len(blob) > 0 { + if blob[0] == 0x00 { + status = "EXPIRED" + if len(blob) == 17 { + offset := binary.BigEndian.Uint64(blob[1:9]) + size := binary.BigEndian.Uint64(blob[9:17]) + details = fmt.Sprintf("offset=%d size=%d", offset, size) + } + } else { + status = fmt.Sprintf("PRESENT (%d bytes, first=0x%02x)", len(blob), blob[0]) + } + } + label := "prefix" + if i == len(path) { + label = "TARGET" + } + if i == 0 { + label = "ROOT" + } + log.Info("Node check", + "label", label, + "path", fmt.Sprintf("%x", subpath), + "pathLen", i, + "status", status, + "details", details) + } + + // Also check a few child paths to see what's below the target + for nibble := byte(0); nibble < 16; nibble++ { + childPath := append(append([]byte{}, path...), nibble) + var blob []byte + if isAccount { + blob = rawdb.ReadAccountTrieNode(chaindb, childPath) + } else { + blob = rawdb.ReadStorageTrieNode(chaindb, owner, childPath) + } + if len(blob) > 0 { + status := fmt.Sprintf("PRESENT (%d bytes, first=0x%02x)", len(blob), blob[0]) + if blob[0] == 0x00 && len(blob) == 17 { + offset := binary.BigEndian.Uint64(blob[1:9]) + size := binary.BigEndian.Uint64(blob[9:17]) + status = fmt.Sprintf("EXPIRED offset=%d size=%d", offset, size) + } + log.Info("Child node", "path", fmt.Sprintf("%x", childPath), "status", status) + } + } + + return nil +} + // cycleCheckScheme returns the state scheme for the database. // It's a helper to check what scheme is in use. func cycleCheckScheme(ctx *cli.Context, db ethdb.Database) string { diff --git a/node/node.go b/node/node.go index 7c0d69775c..d9c38c4b6c 100644 --- a/node/node.go +++ b/node/node.go @@ -38,6 +38,7 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/p2p" "github.com/ethereum/go-ethereum/rpc" + "github.com/ethereum/go-ethereum/trie/archive" "github.com/gofrs/flock" ) @@ -85,6 +86,7 @@ func New(conf *Config) (*Node, error) { return nil, err } conf.DataDir = absdatadir + archive.ArchiveDataDir = absdatadir } if conf.Logger == nil { conf.Logger = log.New() diff --git a/trie/archive/archive.go b/trie/archive/archive.go index fef59e3395..d4f4fb2382 100644 --- a/trie/archive/archive.go +++ b/trie/archive/archive.go @@ -22,6 +22,7 @@ import ( "fmt" "io" "os" + "path/filepath" "github.com/ethereum/go-ethereum/rlp" ) @@ -45,13 +46,16 @@ type Record struct { Value []byte } +// ArchiveDataDir is the data directory where the archive file is stored. +var ArchiveDataDir string + // ArchivedNodeResolver takes a buffer containing the archive data // held by an expiring node (an offset and a size) and returns a // list of records, which is a list of serialized leaf nodes. The // caller knows the context (MPT, binary trie) and is responsible // for decoding the nodes. func ArchivedNodeResolver(offset, size uint64) ([]*Record, error) { - file, err := os.Open("nodearchive") + file, err := os.Open(filepath.Join(ArchiveDataDir, "geth", "nodearchive")) if err != nil { return nil, fmt.Errorf("error opening archive file: %w", err) } diff --git a/trie/archiver.go b/trie/archiver.go index b24ac18c4a..817257c087 100644 --- a/trie/archiver.go +++ b/trie/archiver.go @@ -19,6 +19,7 @@ package trie import ( "encoding/binary" "fmt" + "time" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" @@ -39,6 +40,7 @@ type subtreeInfo struct { height int // Height of subtree (from leaves) leaves []*archive.Record // All leaf records (relative path + encoded node) nodePaths [][]byte // Paths of all nodes to delete + rootHash common.Hash // Hash of the original subtree root (for verification) } // Archiver handles the archival process of trie nodes. @@ -79,7 +81,7 @@ func NewArchiver(db ethdb.Database, triedb database.NodeDatabase, } // ProcessState archives subtrees from the given state root. -// It processes the account trie first, then all storage tries. +// It processes storage tries first, then the account trie. func (a *Archiver) ProcessState(root common.Hash) error { a.stateRoot = root @@ -141,7 +143,12 @@ func (a *Archiver) processTrie(owner common.Hash, t *Trie) error { subtrees := a.findHeight3Subtrees(t.root, nil, owner) log.Info("Found subtrees to archive", "owner", owner, "count", len(subtrees)) - for _, info := range subtrees { + lastLog := time.Now() + for i, info := range subtrees { + if time.Since(lastLog) > 30*time.Second { + log.Info("Archiving subtrees", "owner", owner, "progress", fmt.Sprintf("%d/%d", i, len(subtrees)), "archived", a.subtreesArchived) + lastLog = time.Now() + } if err := a.archiveSubtree(info); err != nil { log.Warn("Failed to archive subtree", "path", common.Bytes2Hex(info.path), "err", err) continue @@ -159,54 +166,86 @@ func (a *Archiver) processTrie(owner common.Hash, t *Trie) error { // findHeight3Subtrees recursively finds all subtrees with height == 3. // Height is measured from leaves: leaves=0, their parents=1, etc. func (a *Archiver) findHeight3Subtrees(n node, path []byte, owner common.Hash) []*subtreeInfo { - info := a.computeSubtreeInfo(n, path, owner) + info, err := a.computeSubtreeInfo(n, path, owner) + if err != nil { + // computeSubtreeInfo failed (e.g. unresolvable hashNode within the + // subtree). We cannot archive this node as-is, but deeper children + // may still form valid height-3 subtrees. Recurse into them. + log.Debug("computeSubtreeInfo failed, trying children", "path", common.Bytes2Hex(path), "err", err) + return a.findSubtreesInChildren(n, path, owner) + } if info == nil { return nil } // If this subtree has height 3, it's a candidate for archival if info.height == 3 { + // Capture the original subtree root hash for verification. + // The hash is available from the node that was passed in: + // - hashNode: the hash IS the node + // - fullNode/shortNode: loaded from DB, flags.hash is set + switch nn := n.(type) { + case hashNode: + info.rootHash = common.BytesToHash(nn) + case *fullNode: + if nn.flags.hash != nil { + info.rootHash = common.BytesToHash(nn.flags.hash) + } + case *shortNode: + if nn.flags.hash != nil { + info.rootHash = common.BytesToHash(nn.flags.hash) + } + } return []*subtreeInfo{info} } // If height > 3, recurse into children to find height-3 subtrees if info.height > 3 { - var results []*subtreeInfo - switch n := n.(type) { - case *fullNode: - for i, child := range n.Children[:16] { - if child != nil { - childPath := append(append([]byte{}, path...), byte(i)) - results = append(results, a.findHeight3Subtrees(child, childPath, owner)...) - } - } - case *shortNode: - childPath := append(append([]byte{}, path...), n.Key...) - results = append(results, a.findHeight3Subtrees(n.Val, childPath, owner)...) - case hashNode: - // Resolve and recurse - resolved, err := a.resolveNode(n, path, owner) - if err == nil { - results = append(results, a.findHeight3Subtrees(resolved, path, owner)...) - } - } - return results + return a.findSubtreesInChildren(n, path, owner) } // Height < 3: no archivable subtrees here return nil } +// findSubtreesInChildren recurses into the children of a node to find +// height-3 subtrees. Used both by the normal height > 3 path and as a +// fallback when computeSubtreeInfo fails for a node. +func (a *Archiver) findSubtreesInChildren(n node, path []byte, owner common.Hash) []*subtreeInfo { + var results []*subtreeInfo + switch n := n.(type) { + case *fullNode: + for i, child := range n.Children[:16] { + if child != nil { + childPath := append(append([]byte{}, path...), byte(i)) + results = append(results, a.findHeight3Subtrees(child, childPath, owner)...) + } + } + case *shortNode: + childPath := append(append([]byte{}, path...), n.Key...) + results = append(results, a.findHeight3Subtrees(n.Val, childPath, owner)...) + case hashNode: + // Resolve and recurse + resolved, err := a.resolveNode(n, path, owner) + if err == nil { + results = append(results, a.findHeight3Subtrees(resolved, path, owner)...) + } + } + return results +} + // computeSubtreeInfo computes height and collects leaves for a subtree. -// Returns nil if the node is nil or an error occurs during resolution. -func (a *Archiver) computeSubtreeInfo(n node, path []byte, owner common.Hash) *subtreeInfo { +// Returns (nil, nil) if the node is nil, already expired, or has no leaves. +// Returns (nil, error) if any constituent node could not be resolved — the +// caller MUST NOT archive a subtree when an error is returned, as the leaf +// set would be incomplete. +func (a *Archiver) computeSubtreeInfo(n node, path []byte, owner common.Hash) (*subtreeInfo, error) { switch n := n.(type) { case nil: - return nil + return nil, nil case valueNode: // Leaf: height 0 - // Encode the leaf as a shortNode for archive storage return &subtreeInfo{ path: copyBytes(path), owner: owner, @@ -216,13 +255,16 @@ func (a *Archiver) computeSubtreeInfo(n node, path []byte, owner common.Hash) *s Value: []byte(n), }}, nodePaths: [][]byte{copyBytes(path)}, - } + }, nil case *shortNode: childPath := append(append([]byte{}, path...), n.Key...) - childInfo := a.computeSubtreeInfo(n.Val, childPath, owner) + childInfo, err := a.computeSubtreeInfo(n.Val, childPath, owner) + if err != nil { + return nil, fmt.Errorf("shortNode key=%x: %w", n.Key, err) + } if childInfo == nil { - return nil + return nil, nil } // Adjust relative paths in leaves to include this node's key @@ -236,7 +278,7 @@ func (a *Archiver) computeSubtreeInfo(n node, path []byte, owner common.Hash) *s height: childInfo.height + 1, leaves: childInfo.leaves, nodePaths: append([][]byte{copyBytes(path)}, childInfo.nodePaths...), - } + }, nil case *fullNode: var ( @@ -247,7 +289,10 @@ func (a *Archiver) computeSubtreeInfo(n node, path []byte, owner common.Hash) *s for i, child := range n.Children[:16] { if child != nil { childPath := append(append([]byte{}, path...), byte(i)) - childInfo := a.computeSubtreeInfo(child, childPath, owner) + childInfo, err := a.computeSubtreeInfo(child, childPath, owner) + if err != nil { + return nil, fmt.Errorf("fullNode child[%x]: %w", i, err) + } if childInfo != nil { if childInfo.height+1 > maxHeight { maxHeight = childInfo.height + 1 @@ -263,7 +308,7 @@ func (a *Archiver) computeSubtreeInfo(n node, path []byte, owner common.Hash) *s } if len(allLeaves) == 0 { - return nil + return nil, nil } return &subtreeInfo{ @@ -272,21 +317,20 @@ func (a *Archiver) computeSubtreeInfo(n node, path []byte, owner common.Hash) *s height: maxHeight, leaves: allLeaves, nodePaths: allPaths, - } + }, nil case hashNode: resolved, err := a.resolveNode(n, path, owner) if err != nil { - log.Debug("Failed to resolve hashNode", "path", common.Bytes2Hex(path), "err", err) - return nil + return nil, fmt.Errorf("failed to resolve hashNode at path %s: %w", common.Bytes2Hex(path), err) } return a.computeSubtreeInfo(resolved, path, owner) case *expiredNode: // Already archived, skip - return nil + return nil, nil } - return nil + return nil, nil } // archiveSubtree writes leaves to archive and replaces subtree with expiredNode. @@ -312,7 +356,25 @@ func (a *Archiver) archiveSubtree(info *subtreeInfo) error { return fmt.Errorf("failed to sync archive: %w", err) } - // 3. Batch database operations + // 3. Verify archive round-trip: reconstruct trie from records and + // check that the hash matches the original subtree root. This + // catches any data corruption before we delete the original nodes. + if info.rootHash != (common.Hash{}) { + reconstructed, err := archiveRecordsToNode(info.leaves) + if err != nil { + return fmt.Errorf("archive verification failed: cannot reconstruct trie from records: %w", err) + } + h := newHasher(false) + gotHash := common.BytesToHash(h.hash(reconstructed, true)) + returnHasherToPool(h) + if gotHash != info.rootHash { + return fmt.Errorf("archive verification failed: hash mismatch at path %s owner %s: got %s want %s (leaves=%d offset=%d size=%d)", + common.Bytes2Hex(info.path), info.owner, gotHash, info.rootHash, + len(info.leaves), offset, size) + } + } + + // 4. Batch database operations batch := a.db.NewBatch() // Delete all nodes in subtree (except the root which we'll overwrite) diff --git a/trie/committer.go b/trie/committer.go index 2a2142e0ff..7ea4e690cf 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -79,6 +79,8 @@ func (c *committer) commit(path []byte, n node, parallel bool) node { return cn case hashNode: return cn + case *expiredNode: + return cn default: // nil, valuenode shouldn't be committed panic(fmt.Sprintf("%T: invalid node: %v", n, n)) diff --git a/trie/expired_node.go b/trie/expired_node.go index 27fef5dc9c..9a93f137c8 100644 --- a/trie/expired_node.go +++ b/trie/expired_node.go @@ -17,6 +17,7 @@ package trie import ( + "bytes" "encoding/binary" "fmt" @@ -33,11 +34,12 @@ const expiredNodeMarker = 0x00 type expiredNode struct { offset uint64 size uint64 + cachedHash hashNode archiveResolver archive.ResolverFn } func (n *expiredNode) cache() (hashNode, bool) { - return nil, true + return n.cachedHash, n.cachedHash == nil } func (n *expiredNode) encode(w rlp.EncoderBuffer) { @@ -62,36 +64,101 @@ func (n *expiredNode) SetArchiveResolver(resolver archive.ResolverFn) { n.archiveResolver = resolver } +// resolveExpiredNodeData resolves an expired node from the archive, verifies +// the reconstructed subtree hash, and stamps the cached hash onto the root. +// Returns an error if the archive data is corrupted (hash mismatch). +func resolveExpiredNodeData(n *expiredNode) (node, error) { + records, err := archive.ArchivedNodeResolver(n.offset, n.size) + if err != nil { + return nil, fmt.Errorf("failed to resolve expired node: %w", err) + } + resolved, err := archiveRecordsToNode(records) + if err != nil { + return nil, fmt.Errorf("failed to rebuild expired node from archive: %w", err) + } + // Verify hash integrity: if the original hash is known, check that the + // reconstructed subtree produces the same hash. A mismatch means the + // archive is corrupted (e.g. missing leaves due to unresolvable hashNodes + // during archival) and any data from it is unreliable. + if n.cachedHash != nil { + h := newHasher(false) + gotHash := h.hash(resolved, true) + returnHasherToPool(h) + if !bytes.Equal(gotHash, n.cachedHash) { + return nil, fmt.Errorf("expired node hash mismatch at offset=%d size=%d: archive data is corrupted (expected %x got %x, %d records)", + n.offset, n.size, []byte(n.cachedHash), gotHash, len(records)) + } + // Stamp the original hash onto the resolved subtree root so the + // hasher returns it directly instead of re-computing. + switch nn := resolved.(type) { + case *fullNode: + nn.flags.hash = n.cachedHash + case *shortNode: + nn.flags.hash = n.cachedHash + } + } + // Mark the entire resolved subtree as dirty. This is critical for + // correctness with pathdb's diff layer model: when a trie with expired + // nodes is modified and committed, the committer only captures dirty + // nodes into the NodeSet (which becomes the diff layer). Without this + // marking, resolved-but-unmodified sibling nodes within the subtree + // would exist nowhere — not in any diff layer (they're clean) and not + // in the raw DB (the archiver deleted them). Subsequent trie accesses + // from higher diff layers would fall through to the disk layer, find + // nothing, and produce MissingNodeError. + // + // For read-only tries (only get operations, no commit), this dirty + // marking is harmless — the nodes are discarded when the trie is GC'd. + markSubtreeDirty(resolved) + return resolved, nil +} + +// markSubtreeDirty recursively marks all fullNode and shortNode in the +// subtree as dirty, preserving any cached hashes. This ensures the +// committer will capture them in the NodeSet during trie commit. +func markSubtreeDirty(n node) { + switch n := n.(type) { + case *fullNode: + n.flags.dirty = true + for _, child := range n.Children[:16] { + if child != nil { + markSubtreeDirty(child) + } + } + case *shortNode: + n.flags.dirty = true + markSubtreeDirty(n.Val) + } + // valueNode, hashNode, nil: no flags to mark +} + func archiveRecordsToNode(records []*archive.Record) (node, error) { if len(records) == 0 { return nil, archive.EmptyArchiveRecord } - if len(records) == 1 { - return buildLeafFromRecord(records[0]) - } - var newnode fullNode + // Build the trie incrementally from nil to produce the canonical + // MPT structure. Starting with a fullNode would be wrong when the + // original subtree root was a shortNode (shared prefix). + var root node for i, record := range records { if err := validateRecordPath(record.Path); err != nil { return nil, err } - // we are not in the case of a single leaf node, so each - // path should be at least 2 nibbles (terminator included) - if len(record.Path) < 2 || !hasTerm(record.Path) { - return nil, fmt.Errorf("invalid record path for non-leaf node #%d: %v", i, record.Path) - } key, err := normalizeRecordKey(record.Path) if err != nil { return nil, err } - child, err := insertTrieNode(newnode.Children[key[0]], key[1:], valueNode(record.Value)) + if len(key) < 1 { + return nil, fmt.Errorf("empty key in record #%d", i) + } + root, err = insertTrieNode(root, key, valueNode(record.Value)) if err != nil { return nil, err } - newnode.Children[key[0]] = child } - return &newnode, nil + return root, nil } func validateRecordPath(path []byte) error { @@ -106,14 +173,6 @@ func validateRecordPath(path []byte) error { return nil } -func buildLeafFromRecord(record *archive.Record) (node, error) { - key, err := normalizeRecordKey(record.Path) - if err != nil { - return nil, err - } - return &shortNode{Key: key, Val: valueNode(record.Value)}, nil -} - // normalizeRecordKey ensures the record path is a hex-nibble key suitable for // leaf insertion by guaranteeing a single terminator nibble and preserving any // already-terminated path. Empty paths are normalized to a sole terminator. diff --git a/trie/expired_node_test.go b/trie/expired_node_test.go index 4b4267ba37..40b3b056b4 100644 --- a/trie/expired_node_test.go +++ b/trie/expired_node_test.go @@ -19,12 +19,45 @@ package trie import ( "bytes" "errors" + "os" + "path/filepath" "testing" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie/archive" ) +// setupTestArchive creates a temporary archive directory with an archive file +// containing the given records, and configures archive.ArchiveDataDir to point +// to it. It returns the offset and size of the written data, and a cleanup function. +func setupTestArchive(t *testing.T, records []*archive.Record) (offset, size uint64, cleanup func()) { + t.Helper() + tmpDir := t.TempDir() + gethDir := filepath.Join(tmpDir, "geth") + if err := os.MkdirAll(gethDir, 0755); err != nil { + t.Fatal(err) + } + + writer, err := archive.NewArchiveWriter(filepath.Join(gethDir, "nodearchive")) + if err != nil { + t.Fatal(err) + } + + offset, size, err = writer.WriteSubtree(records) + if err != nil { + writer.Close() + t.Fatal(err) + } + writer.Close() + + oldDir := archive.ArchiveDataDir + archive.ArchiveDataDir = tmpDir + + return offset, size, func() { + archive.ArchiveDataDir = oldDir + } +} + func TestExpiredNodeEncodeDecode(t *testing.T) { testCases := []struct { offset uint64 @@ -120,34 +153,36 @@ func TestExpiredNodeInvalidLength(t *testing.T) { } } -func TestExpiredNodeNoResolver(t *testing.T) { +func TestExpiredNodeNoArchiveFile(t *testing.T) { + // When no archive file exists, Get should return an error + tmpDir := t.TempDir() + gethDir := filepath.Join(tmpDir, "geth") + if err := os.MkdirAll(gethDir, 0755); err != nil { + t.Fatal(err) + } + + oldDir := archive.ArchiveDataDir + archive.ArchiveDataDir = tmpDir + defer func() { archive.ArchiveDataDir = oldDir }() + tr := NewEmpty(nil) - tr.root = &expiredNode{offset: 100} + tr.root = &expiredNode{offset: 100, size: 50} _, err := tr.Get([]byte("key")) - if !errors.Is(err, archive.ErrNoResolver) { - t.Errorf("expected archive.ErrNoResolver, got %v", err) + if err == nil { + t.Error("expected error when archive file doesn't exist") } } func TestExpiredNodeWithResolver(t *testing.T) { + records := []*archive.Record{ + {Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")}, + } + offset, size, cleanup := setupTestArchive(t, records) + defer cleanup() + tr := NewEmpty(nil) - - leafNode := &shortNode{ - Key: hexToCompact(keybytesToHex([]byte{0x12})), - Val: valueNode([]byte("testvalue")), - } - encodedLeaf := nodeToBytes(leafNode) - - resolver := func(offset, size uint64) ([]*archive.Record, error) { - if offset == 100 { - return []*archive.Record{{Value: encodedLeaf}}, nil - } - return nil, errors.New("unknown offset") - } - - tr.SetArchiveResolver(resolver) - tr.root = &expiredNode{offset: 100, size: uint64(len(encodedLeaf)), archiveResolver: resolver} + tr.root = &expiredNode{offset: offset, size: size} val, err := tr.Get([]byte{0x12}) if err != nil { @@ -159,14 +194,10 @@ func TestExpiredNodeWithResolver(t *testing.T) { } func TestExpiredNodeCopy(t *testing.T) { - resolver := func(offset, size uint64) ([]*archive.Record, error) { - return nil, nil - } - original := &expiredNode{ offset: 12345, size: 6789, - archiveResolver: resolver, + archiveResolver: archive.ArchivedNodeResolver, } copied := copyNode(original) @@ -201,18 +232,9 @@ func TestArchiveRecordsToNodeEmpty(t *testing.T) { } func TestArchiveRecordsToNodeMultiple(t *testing.T) { - leaf1 := &shortNode{ - Key: hexToCompact(keybytesToHex([]byte{0x10})), - Val: valueNode([]byte("value1")), - } - leaf2 := &shortNode{ - Key: hexToCompact(keybytesToHex([]byte{0x20})), - Val: valueNode([]byte("value2")), - } - records := []*archive.Record{ - {Path: []byte{0x01}, Value: nodeToBytes(leaf1)}, - {Path: []byte{0x02}, Value: nodeToBytes(leaf2)}, + {Path: []byte{0x01, 16}, Value: []byte("value1")}, + {Path: []byte{0x02, 16}, Value: []byte("value2")}, } node, err := archiveRecordsToNode(records) @@ -234,27 +256,17 @@ func TestArchiveRecordsToNodeMultiple(t *testing.T) { } func TestExpiredNodeGetMultipleRecords(t *testing.T) { - leaf1 := &shortNode{ - Key: hexToCompact([]byte{0x02, 0x03, 0x04, 16}), - Val: valueNode([]byte("value1")), - } - leaf2 := &shortNode{ - Key: hexToCompact([]byte{0x05, 0x06, 0x07, 16}), - Val: valueNode([]byte("value2")), - } - - resolver := func(offset, size uint64) ([]*archive.Record, error) { - return []*archive.Record{ - {Path: []byte{0x01}, Value: nodeToBytes(leaf1)}, - {Path: []byte{0x04}, Value: nodeToBytes(leaf2)}, - }, nil + records := []*archive.Record{ + {Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")}, + {Path: []byte{0x04, 0x05, 16}, Value: []byte("value2")}, } + offset, size, cleanup := setupTestArchive(t, records) + defer cleanup() tr := NewEmpty(nil) - tr.SetArchiveResolver(resolver) - tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + tr.root = &expiredNode{offset: offset, size: size} - val, err := tr.Get([]byte{0x12, 0x34}) + val, err := tr.Get([]byte{0x12}) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -263,10 +275,9 @@ func TestExpiredNodeGetMultipleRecords(t *testing.T) { } tr2 := NewEmpty(nil) - tr2.SetArchiveResolver(resolver) - tr2.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + tr2.root = &expiredNode{offset: offset, size: size} - val2, err := tr2.Get([]byte{0x45, 0x67}) + val2, err := tr2.Get([]byte{0x45}) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -276,20 +287,14 @@ func TestExpiredNodeGetMultipleRecords(t *testing.T) { } func TestExpiredNodeGetKeyNotFound(t *testing.T) { - leaf := &shortNode{ - Key: hexToCompact(keybytesToHex([]byte{0x12})), - Val: valueNode([]byte("value1")), - } - - resolver := func(offset, size uint64) ([]*archive.Record, error) { - return []*archive.Record{ - {Path: []byte{0x01}, Value: nodeToBytes(leaf)}, - }, nil + records := []*archive.Record{ + {Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")}, } + offset, size, cleanup := setupTestArchive(t, records) + defer cleanup() tr := NewEmpty(nil) - tr.SetArchiveResolver(resolver) - tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + tr.root = &expiredNode{offset: offset, size: size} val, err := tr.Get([]byte{0xff, 0xff}) if err != nil { @@ -301,20 +306,14 @@ func TestExpiredNodeGetKeyNotFound(t *testing.T) { } func TestExpiredNodeGetPathMismatch(t *testing.T) { - leaf := &shortNode{ - Key: hexToCompact(keybytesToHex([]byte{0x12})), - Val: valueNode([]byte("testvalue")), - } - - resolver := func(offset, size uint64) ([]*archive.Record, error) { - return []*archive.Record{ - {Path: []byte{0x01}, Value: nodeToBytes(leaf)}, - }, nil + records := []*archive.Record{ + {Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")}, } + offset, size, cleanup := setupTestArchive(t, records) + defer cleanup() tr := NewEmpty(nil) - tr.SetArchiveResolver(resolver) - tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + tr.root = &expiredNode{offset: offset, size: size} val, err := tr.Get([]byte{0x19}) if err != nil { @@ -326,20 +325,14 @@ func TestExpiredNodeGetPathMismatch(t *testing.T) { } func TestExpiredNodeInsert(t *testing.T) { - leaf := &shortNode{ - Key: hexToCompact(keybytesToHex([]byte{0x12})), - Val: valueNode([]byte("existing")), - } - - resolver := func(offset, size uint64) ([]*archive.Record, error) { - return []*archive.Record{ - {Path: []byte{}, Value: nodeToBytes(leaf)}, - }, nil + records := []*archive.Record{ + {Path: []byte{0x01, 0x02, 16}, Value: []byte("existing")}, } + offset, size, cleanup := setupTestArchive(t, records) + defer cleanup() tr := NewEmpty(nil) - tr.SetArchiveResolver(resolver) - tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + tr.root = &expiredNode{offset: offset, size: size} err := tr.Update([]byte{0x45}, []byte("newvalue")) if err != nil { @@ -356,20 +349,14 @@ func TestExpiredNodeInsert(t *testing.T) { } func TestExpiredNodeUpdate(t *testing.T) { - leaf := &shortNode{ - Key: hexToCompact(keybytesToHex([]byte{0x12})), - Val: valueNode([]byte("oldvalue")), - } - - resolver := func(offset, size uint64) ([]*archive.Record, error) { - return []*archive.Record{ - {Path: []byte{}, Value: nodeToBytes(leaf)}, - }, nil + records := []*archive.Record{ + {Path: []byte{0x01, 0x02, 16}, Value: []byte("oldvalue")}, } + offset, size, cleanup := setupTestArchive(t, records) + defer cleanup() tr := NewEmpty(nil) - tr.SetArchiveResolver(resolver) - tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + tr.root = &expiredNode{offset: offset, size: size} err := tr.Update([]byte{0x12}, []byte("newvalue")) if err != nil { @@ -386,28 +373,15 @@ func TestExpiredNodeUpdate(t *testing.T) { } func TestExpiredNodeDelete(t *testing.T) { - leaf1 := &shortNode{ - Key: hexToCompact([]byte{0x02, 16}), - Val: valueNode([]byte("value1")), - } - leaf2 := &shortNode{ - Key: hexToCompact([]byte{0x05, 16}), - Val: valueNode([]byte("value2")), - } - - branch := &fullNode{} - branch.Children[0x01] = leaf1 - branch.Children[0x04] = leaf2 - - resolver := func(offset, size uint64) ([]*archive.Record, error) { - return []*archive.Record{ - {Path: []byte{}, Value: nodeToBytes(branch)}, - }, nil + records := []*archive.Record{ + {Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")}, + {Path: []byte{0x04, 0x05, 16}, Value: []byte("value2")}, } + offset, size, cleanup := setupTestArchive(t, records) + defer cleanup() tr := NewEmpty(nil) - tr.SetArchiveResolver(resolver) - tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + tr.root = &expiredNode{offset: offset, size: size} err := tr.Delete([]byte{0x12}) if err != nil { @@ -432,22 +406,14 @@ func TestExpiredNodeDelete(t *testing.T) { } func TestTrieCopyPreservesArchiveResolver(t *testing.T) { - leaf := &shortNode{ - Key: hexToCompact(keybytesToHex([]byte{0x12})), - Val: valueNode([]byte("testvalue")), - } - - resolverCalled := false - resolver := func(offset, size uint64) ([]*archive.Record, error) { - resolverCalled = true - return []*archive.Record{ - {Path: []byte{}, Value: nodeToBytes(leaf)}, - }, nil + records := []*archive.Record{ + {Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")}, } + offset, size, cleanup := setupTestArchive(t, records) + defer cleanup() tr := NewEmpty(nil) - tr.SetArchiveResolver(resolver) - tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + tr.root = &expiredNode{offset: offset, size: size} trCopy := tr.Copy() @@ -455,36 +421,180 @@ func TestTrieCopyPreservesArchiveResolver(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if !resolverCalled { - t.Error("resolver was not called on copied trie") - } if string(val) != "testvalue" { t.Errorf("value mismatch: got %q, want %q", val, "testvalue") } } -func TestExpiredNodeGetNode(t *testing.T) { - leaf := &shortNode{ - Key: hexToCompact(keybytesToHex([]byte{0x12})), - Val: valueNode([]byte("testvalue")), - } - - resolverCalled := false - resolver := func(offset, size uint64) ([]*archive.Record, error) { - resolverCalled = true - return []*archive.Record{ - {Path: []byte{}, Value: nodeToBytes(leaf)}, - }, nil +func TestWalkWithExpiredNodes(t *testing.T) { + records := []*archive.Record{ + {Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")}, + {Path: []byte{0x04, 0x05, 16}, Value: []byte("value2")}, + {Path: []byte{0x07, 0x08, 16}, Value: []byte("value3")}, } + offset, size, cleanup := setupTestArchive(t, records) + defer cleanup() tr := NewEmpty(nil) - tr.SetArchiveResolver(resolver) - tr.root = &expiredNode{offset: 100, size: 200, archiveResolver: resolver} + tr.root = &expiredNode{offset: offset, size: size} + + var leaves []string + stats, err := tr.Walk(func(path []byte, value []byte) error { + leaves = append(leaves, string(value)) + return nil + }) + if err != nil { + t.Fatalf("Walk failed: %v", err) + } + if stats.Leaves != 3 { + t.Errorf("expected 3 leaves, got %d", stats.Leaves) + } + if stats.ExpiredResolved != 1 { + t.Errorf("expected 1 expired resolved, got %d", stats.ExpiredResolved) + } + // Verify all values were visited + expected := map[string]bool{"value1": true, "value2": true, "value3": true} + for _, leaf := range leaves { + if !expected[leaf] { + t.Errorf("unexpected leaf value: %q", leaf) + } + delete(expected, leaf) + } + if len(expected) > 0 { + t.Errorf("missing leaves: %v", expected) + } +} + +func TestWalkEmptyTrie(t *testing.T) { + tr := NewEmpty(nil) + stats, err := tr.Walk(func(path []byte, value []byte) error { + t.Error("callback should not be called for empty trie") + return nil + }) + if err != nil { + t.Fatalf("Walk failed: %v", err) + } + if stats.Leaves != 0 || stats.ExpiredResolved != 0 { + t.Errorf("expected zero stats for empty trie, got leaves=%d expired=%d", stats.Leaves, stats.ExpiredResolved) + } +} + +func TestWalkCallbackError(t *testing.T) { + records := []*archive.Record{ + {Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")}, + } + offset, size, cleanup := setupTestArchive(t, records) + defer cleanup() + + tr := NewEmpty(nil) + tr.root = &expiredNode{offset: offset, size: size} + + testErr := errors.New("test error") + _, err := tr.Walk(func(path []byte, value []byte) error { + return testErr + }) + if !errors.Is(err, testErr) { + t.Fatalf("expected test error, got %v", err) + } +} + +// TestExpiredNodeResolvedSubtreeDirty verifies that when an expired node is +// resolved and a sibling leaf is modified, the commit captures ALL resolved +// nodes (not just the modified path). Without this fix, resolved-but-unmodified +// nodes would be lost: not in the diff layer (clean) and not in the raw DB +// (deleted by archiver). +func TestExpiredNodeResolvedSubtreeDirty(t *testing.T) { + // Use large values (>32 bytes) so leaf nodes are NOT embedded in + // their parent. This matches production storage tries where + // intermediate nodes are large enough to be stored independently. + bigVal1 := bytes.Repeat([]byte("A"), 40) + bigVal2 := bytes.Repeat([]byte("B"), 40) + + // Create an archive with records under different branches. + records := []*archive.Record{ + {Path: []byte{0x01, 0x02, 16}, Value: bigVal1}, + {Path: []byte{0x04, 0x05, 16}, Value: bigVal2}, + } + offset, size, cleanup := setupTestArchive(t, records) + defer cleanup() + + tr := NewEmpty(nil) + tr.root = &expiredNode{offset: offset, size: size} + + // Insert a value that goes through one branch of the resolved subtree. + // This modifies path [1, ...] but leaves path [4, ...] unmodified. + if err := tr.Update([]byte{0x12}, bytes.Repeat([]byte("C"), 40)); err != nil { + t.Fatalf("Update failed: %v", err) + } + + // Commit the trie. The NodeSet should be non-nil because we modified data. + _, nodes := tr.Commit(false) + if nodes == nil { + t.Fatal("expected non-nil NodeSet after modifying expired subtree") + } + + // The resolved-but-unmodified sibling (path [4, 5]) should also be + // captured in the NodeSet, because markSubtreeDirty ensures all resolved + // nodes are dirty. Count the nodes to verify. + nodeCount := len(nodes.Nodes) + // We expect at least 3 nodes: the root, the modified branch, and the + // sibling branch. The exact count depends on trie structure. + if nodeCount < 3 { + t.Errorf("expected at least 3 nodes in NodeSet (root + modified + sibling), got %d", nodeCount) + } +} + +// TestMarkSubtreeDirty verifies that markSubtreeDirty correctly sets the dirty +// flag on all nodes in a subtree while preserving cached hashes. +func TestMarkSubtreeDirty(t *testing.T) { + // Build a small trie structure + leaf1 := &shortNode{Key: []byte{1, 16}, Val: valueNode("v1")} + leaf2 := &shortNode{Key: []byte{2, 16}, Val: valueNode("v2")} + branch := &fullNode{} + branch.Children[1] = leaf1 + branch.Children[2] = leaf2 + + // Set hash but not dirty (as if loaded from DB) + branch.flags = nodeFlag{hash: hashNode("testhash"), dirty: false} + leaf1.flags = nodeFlag{hash: hashNode("hash1"), dirty: false} + leaf2.flags = nodeFlag{hash: hashNode("hash2"), dirty: false} + + markSubtreeDirty(branch) + + // All nodes should be dirty + if !branch.flags.dirty { + t.Error("branch should be dirty") + } + if !leaf1.flags.dirty { + t.Error("leaf1 should be dirty") + } + if !leaf2.flags.dirty { + t.Error("leaf2 should be dirty") + } + + // Hashes should be preserved + if !bytes.Equal(branch.flags.hash, hashNode("testhash")) { + t.Error("branch hash should be preserved") + } + if !bytes.Equal(leaf1.flags.hash, hashNode("hash1")) { + t.Error("leaf1 hash should be preserved") + } + if !bytes.Equal(leaf2.flags.hash, hashNode("hash2")) { + t.Error("leaf2 hash should be preserved") + } +} + +func TestExpiredNodeGetNode(t *testing.T) { + records := []*archive.Record{ + {Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")}, + } + offset, size, cleanup := setupTestArchive(t, records) + defer cleanup() + + tr := NewEmpty(nil) + tr.root = &expiredNode{offset: offset, size: size} _, _, err := tr.GetNode(hexToCompact([]byte{0x01, 0x02})) - if !resolverCalled { - t.Error("resolver was not called during GetNode") - } if err != nil && err.Error() != "non-consensus node" { t.Fatalf("unexpected error: %v", err) } diff --git a/trie/hasher.go b/trie/hasher.go index a2a1f5b662..d4376e12e2 100644 --- a/trie/hasher.go +++ b/trie/hasher.go @@ -18,6 +18,7 @@ package trie import ( "bytes" + "encoding/binary" "fmt" "sync" @@ -97,6 +98,22 @@ func (h *hasher) hash(n node, force bool) []byte { // hash nodes don't have children, so they're left as were return n + case *expiredNode: + // Return the original subtree hash that was cached when the + // expired node was decoded. The parent node references this + // hash, so we must return the same value to keep the Merkle + // root consistent. + if n.cachedHash != nil { + return n.cachedHash + } + // Fallback: hash the marker blob (should not happen in practice + // because decodeNodeUnsafe always provides the hash). + var buf [1 + 2*8]byte // 17 bytes + buf[0] = expiredNodeMarker + binary.BigEndian.PutUint64(buf[1:], n.offset) + binary.BigEndian.PutUint64(buf[9:], n.size) + return h.hashData(buf[:]) + default: panic(fmt.Errorf("unexpected node type, %T", n)) } @@ -214,6 +231,12 @@ func (h *hasher) proofHash(original node) []byte { return bytes.Clone(h.encodeShortNode(n)) case *fullNode: return bytes.Clone(h.encodeFullNode(n)) + case *expiredNode: + var buf [1 + 2*8]byte + buf[0] = expiredNodeMarker + binary.BigEndian.PutUint64(buf[1:], n.offset) + binary.BigEndian.PutUint64(buf[9:], n.size) + return buf[:] default: panic(fmt.Errorf("unexpected node type, %T", original)) } diff --git a/trie/node.go b/trie/node.go index 2556ba9f81..f9e0840c1d 100644 --- a/trie/node.go +++ b/trie/node.go @@ -25,6 +25,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie/archive" ) @@ -166,7 +167,8 @@ func decodeNodeUnsafe(hash, buf []byte) (node, error) { } offset := binary.BigEndian.Uint64(buf[1:]) size := binary.BigEndian.Uint64(buf[1+archive.OffsetSize:]) - return &expiredNode{offset: offset, size: size, archiveResolver: archive.ArchivedNodeResolver}, nil + log.Debug("Decoded expired node", "offset", offset, "size", size, "hash", common.BytesToHash(hash)) + return &expiredNode{offset: offset, size: size, cachedHash: hashNode(hash), archiveResolver: archive.ArchivedNodeResolver}, nil } elems, _, err := rlp.SplitList(buf) if err != nil { diff --git a/trie/proof.go b/trie/proof.go index 58075daf9b..5be05c6f81 100644 --- a/trie/proof.go +++ b/trie/proof.go @@ -25,6 +25,7 @@ import ( "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie/archive" ) // Prove constructs a merkle proof for key. The result contains all encoded nodes @@ -78,6 +79,16 @@ func (t *Trie) Prove(key []byte, proofDb ethdb.KeyValueWriter) error { // clean cache or the database, they are all in their own // copy and safe to use unsafe decoder. tn = mustDecodeNodeUnsafe(n, blob) + case *expiredNode: + records, err := archive.ArchivedNodeResolver(n.offset, n.size) + if err != nil { + return fmt.Errorf("failed to resolve expired node in proof: %w", err) + } + resolved, err := archiveRecordsToNode(records) + if err != nil { + return fmt.Errorf("failed to rebuild expired node in proof: %w", err) + } + tn = resolved default: panic(fmt.Sprintf("%T: invalid node: %v", tn, tn)) } @@ -617,6 +628,8 @@ func get(tn node, key []byte, skipResolved bool) ([]byte, node) { } case hashNode: return key, n + case *expiredNode: + return key, n case nil: return key, nil case valueNode: diff --git a/trie/trie.go b/trie/trie.go index 5f2cdcdcfe..d8282a4e2d 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -230,32 +230,12 @@ func (t *Trie) get(origNode node, key []byte, pos int) (value []byte, newnode no value, newnode, _, err := t.get(child, key, pos) return value, newnode, true, err case *expiredNode: - records, err := archive.ArchivedNodeResolver(n.offset, n.size) - if err != nil { - return nil, n, false, fmt.Errorf("failed to resolve expired node: %w", err) - } - newnode, err := archiveRecordsToNode(records) - // alternative: don't rebuild, just find the value - // for _, record := range records { - // // make sure that the path up to the node matches - // if bytes.HasPrefix(key[pos:], record.Path) { - // resolved, err := decodeNodeUnsafe(nil, record.Value) - // if err != nil { - // fmt.Printf("%v %x\n", record.Path, record.Value) - // return nil, n, false, fmt.Errorf("failed to deserialize RLP node: %w", err) - // } - // if leaf, ok := resolved.(*shortNode); ok { - // // make sure that the key to the leaf also matches - // if bytes.Equal(key[pos+len(record.Path):], leaf.Key) { - // return leaf.Val.(valueNode), newnode, true, nil - // } - // } - // } - // } + log.Debug("Resolving expired node in get()", "owner", t.owner, "offset", n.offset, "size", n.size, "pos", pos) + newnode, err := resolveExpiredNodeData(n) if err != nil { return nil, n, false, err } - value, _, _, err = t.get(newnode, key, pos+1) + value, _, _, err = t.get(newnode, key, pos) return value, newnode, true, err default: panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode)) @@ -392,12 +372,11 @@ func (t *Trie) getNode(origNode node, path []byte, pos int) (item []byte, newnod return item, newnode, resolved + 1, err case *expiredNode: - records, err := archive.ArchivedNodeResolver(n.offset, n.size) + rn, err := resolveExpiredNodeData(n) if err != nil { - return nil, n, 0, fmt.Errorf("failed to resolve expired node: %w", err) + return nil, n, 0, err } - newnode, err := archiveRecordsToNode(records) - item, newnode, resolvedCount, err := t.getNode(newnode, path, pos) + item, newnode, resolvedCount, err := t.getNode(rn, path, pos) return item, newnode, resolvedCount + 1, err default: @@ -524,16 +503,16 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error return true, nn, nil case *expiredNode: - records, err := archive.ArchivedNodeResolver(n.offset, n.size) + log.Info("Resolving expired node in insert()", "owner", t.owner, "offset", n.offset, "size", n.size) + rn, err := resolveExpiredNodeData(n) if err != nil { - return false, nil, fmt.Errorf("failed to resolve expired node: %w", err) + return false, nil, err } - nn, err := archiveRecordsToNode(records) - if err != nil { - return false, nil, fmt.Errorf("failed to rebuild expired node from archive: %w", err) + dirty, nn, err := t.insert(rn, prefix, key, value) + if !dirty || err != nil { + return false, rn, err } - dirty, nn, err := t.insert(nn, prefix, key, value) - return dirty && err == nil, nn, err + return true, nn, nil default: panic(fmt.Sprintf("%T: invalid node: %v", n, n)) @@ -697,16 +676,16 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { return true, nn, nil case *expiredNode: - records, err := archive.ArchivedNodeResolver(n.offset, n.size) + log.Info("Resolving expired node in delete()", "owner", t.owner, "offset", n.offset, "size", n.size) + rn, err := resolveExpiredNodeData(n) if err != nil { - return false, nil, fmt.Errorf("failed to resolve expired node: %w", err) + return false, nil, err } - nn, err := archiveRecordsToNode(records) - if err != nil { - return false, nil, fmt.Errorf("failed to rebuild expired node from archive: %w", err) + dirty, nn, err := t.delete(rn, prefix, key) + if !dirty || err != nil { + return false, rn, err } - dirty, _, err := t.delete(nn, prefix, key) - return dirty && err == nil, nn, err + return true, nn, nil default: panic(fmt.Sprintf("%T: invalid node: %v (%v)", n, n, key)) @@ -742,7 +721,8 @@ func copyNode(n node) node { return &expiredNode{ offset: n.offset, size: n.size, - archiveResolver: archive.ArchivedNodeResolver, + cachedHash: common.CopyBytes(n.cachedHash), + archiveResolver: n.archiveResolver, } default: panic(fmt.Sprintf("%T: unknown node type", n)) @@ -750,8 +730,11 @@ func copyNode(n node) node { } func (t *Trie) resolve(n node, prefix []byte) (node, error) { - if n, ok := n.(hashNode); ok { + switch n := n.(type) { + case hashNode: return t.resolveAndTrack(n, prefix) + case *expiredNode: + return resolveExpiredNodeData(n) } return n, nil } @@ -862,6 +845,58 @@ func (t *Trie) Witness() map[string][]byte { return t.prevalueTracer.Values() } +// WalkStats holds statistics from a Walk traversal. +type WalkStats struct { + Leaves int // Number of leaf nodes visited + ExpiredResolved int // Number of expired nodes resolved from archive +} + +// Walk recursively traverses the trie, resolving all nodes including +// hashNodes and expiredNodes. It calls fn for each leaf found. +// This triggers hash verification for expired nodes via cachedHash. +func (t *Trie) Walk(fn func(path []byte, value []byte) error) (WalkStats, error) { + return t.walk(t.root, nil, fn) +} + +func (t *Trie) walk(n node, path []byte, fn func([]byte, []byte) error) (WalkStats, error) { + switch n := n.(type) { + case *shortNode: + return t.walk(n.Val, append(append([]byte{}, path...), n.Key...), fn) + case *fullNode: + var stats WalkStats + for i, child := range n.Children[:16] { + if child != nil { + childStats, err := t.walk(child, append(append([]byte{}, path...), byte(i)), fn) + if err != nil { + return stats, err + } + stats.Leaves += childStats.Leaves + stats.ExpiredResolved += childStats.ExpiredResolved + } + } + return stats, nil + case hashNode: + resolved, err := t.resolveAndTrack(n, path) + if err != nil { + return WalkStats{}, err + } + return t.walk(resolved, path, fn) + case *expiredNode: + resolved, err := resolveExpiredNodeData(n) + if err != nil { + return WalkStats{}, err + } + childStats, err := t.walk(resolved, path, fn) + childStats.ExpiredResolved++ + return childStats, err + case valueNode: + return WalkStats{Leaves: 1}, fn(path, []byte(n)) + case nil: + return WalkStats{}, nil + } + return WalkStats{}, nil +} + // reset drops the referenced root node and cleans all internal state. func (t *Trie) reset() { t.root = nil diff --git a/triedb/database.go b/triedb/database.go index ef95169df1..71b578367b 100644 --- a/triedb/database.go +++ b/triedb/database.go @@ -399,6 +399,28 @@ func (db *Database) Disk() ethdb.Database { return db.disk } +// DiffHead returns the root hash of the topmost diff layer in pathdb. +// If there are no diff layers or the backend is not pathdb, it returns +// the zero hash and false. +func (db *Database) DiffHead() (common.Hash, bool) { + pdb, ok := db.backend.(*pathdb.Database) + if !ok { + return common.Hash{}, false + } + return pdb.DiffHead() +} + +// DisableStateHistory closes and disables the state history freezer. +// This is used by the archiver to bypass state history writes during +// diff layer flushing when state history may have gaps. +func (db *Database) DisableStateHistory() { + pdb, ok := db.backend.(*pathdb.Database) + if !ok { + return + } + pdb.DisableStateHistory() +} + // SnapshotCompleted returns the indicator if the snapshot is completed. func (db *Database) SnapshotCompleted() bool { pdb, ok := db.backend.(*pathdb.Database) diff --git a/triedb/pathdb/database.go b/triedb/pathdb/database.go index e52949c93e..ba606552df 100644 --- a/triedb/pathdb/database.go +++ b/triedb/pathdb/database.go @@ -318,6 +318,30 @@ func (db *Database) Update(root common.Hash, parentRoot common.Hash, block uint6 return db.tree.cap(root, maxDiffLayers) } +// DiffHead returns the root hash of the topmost diff layer. If there are no +// diff layers (only the disk layer), it returns the disk layer root and false. +func (db *Database) DiffHead() (common.Hash, bool) { + db.lock.RLock() + defer db.lock.RUnlock() + + return db.tree.diffHead() +} + +// DisableStateHistory closes and disables the state history freezer. This is +// used by the archiver to bypass state history writes during diff layer flushing, +// since the archiver only needs trie nodes committed to disk and state history +// may have gaps from unclean shutdowns that prevent sequential appends. +func (db *Database) DisableStateHistory() { + db.lock.Lock() + defer db.lock.Unlock() + + if db.stateFreezer != nil { + db.stateFreezer.Close() + db.stateFreezer = nil + log.Info("Disabled state history freezer") + } +} + // Commit traverses downwards the layer tree from a specified layer with the // provided state root and all the layers below are flattened downwards. It // can be used alone and mostly for test purposes. diff --git a/triedb/pathdb/history.go b/triedb/pathdb/history.go index 55ec29e4f0..4730802d9c 100644 --- a/triedb/pathdb/history.go +++ b/triedb/pathdb/history.go @@ -278,9 +278,17 @@ func truncateFromHead(store ethdb.AncientStore, typ historyType, nhead uint64) ( return 0, err } // Ensure that the truncation target falls within the valid range. - if ohead < nhead || nhead < otail { + if nhead < otail { return 0, fmt.Errorf("%w, %s, tail: %d, head: %d, target: %d", errHeadTruncationOutOfRange, typ, otail, ohead, nhead) } + // If the target is ahead of the current head, there's nothing to truncate. + // This can happen after unclean shutdowns where the state history was not + // fully written. + if ohead < nhead { + log.Warn("State history shorter than target, nothing to truncate", + "type", typ.String(), "head", ohead, "target", nhead) + return 0, nil + } // Short circuit if nothing to truncate. if ohead == nhead { return 0, nil diff --git a/triedb/pathdb/layertree.go b/triedb/pathdb/layertree.go index b20e40bd05..99fd23a2a1 100644 --- a/triedb/pathdb/layertree.go +++ b/triedb/pathdb/layertree.go @@ -31,6 +31,7 @@ import ( // of the referenced layer by themselves. type layerTree struct { base *diskLayer + head common.Hash // Root hash of the topmost layer (diff or disk) layers map[common.Hash]layer // descendants is a two-dimensional map where the keys represent @@ -59,6 +60,7 @@ func (tree *layerTree) init(head layer) { defer tree.lock.Unlock() current := head + tree.head = head.rootHash() tree.layers = make(map[common.Hash]layer) tree.descendants = make(map[common.Hash]map[common.Hash]struct{}) @@ -76,6 +78,18 @@ func (tree *layerTree) init(head layer) { tree.lookup = newLookup(head, tree.isDescendant) } +// diffHead returns the root hash of the topmost diff layer. If there are no +// diff layers, returns the disk layer root and false. +func (tree *layerTree) diffHead() (common.Hash, bool) { + tree.lock.RLock() + defer tree.lock.RUnlock() + + if _, ok := tree.layers[tree.head].(*diffLayer); ok { + return tree.head, true + } + return tree.base.rootHash(), false +} + // get retrieves a layer belonging to the given state root. func (tree *layerTree) get(root common.Hash) layer { tree.lock.RLock() From 2ee9408be5529193fd65785385a6f5ccf95dd120 Mon Sep 17 00:00:00 2001 From: Emualliug Tellab <249254436+tellabg@users.noreply.github.com> Date: Wed, 11 Mar 2026 22:32:46 +0100 Subject: [PATCH 7/9] trie/archiver: streaming archival + pathdb journal consistency (#567) * trie/archiver: streaming subtree archival to fix OOM Replace the recursive approach that loaded the entire trie into memory with a streaming NodeIterator-based approach: - processTrie now uses NodeIterator to walk the trie node-by-node - probeHeight reads nodes from raw DB, computes height bounded at 3, and discards decoded nodes immediately (no in-memory trie buildup) - collectSubtree only materializes the bounded height-3 subtree being archived (at most ~4096 nodes) - Memory usage: O(iterator_stack) + O(current_subtree) instead of O(entire_trie) This fixes OOM kills on large storage tries (e.g. contracts with millions of storage slots) where the previous approach would load all nodes and subtreeInfo into memory before archiving any of them. * cmd/geth: flush diff layers before archiving, re-journal after Instead of deleting the pathdb journal after archive generation (which breaks the chain head and prevents block imports), properly integrate with pathdb: 1. Open triedb with pathdb support (not just raw KV) 2. Disable state history freezer (avoid append gaps) 3. Flush all diff layers to disk via Commit() before archiving 4. After archiving, re-journal the pathdb state (disk layer only) This ensures geth can restart cleanly after archiving and continue importing blocks without 'unknown ancestor' errors. * trie: add resurrection timing and depth metrics to expired node resolution * Update trie/archiver.go --------- Co-authored-by: Guillaume Ballet <3272758+gballet@users.noreply.github.com> --- cmd/geth/archivecmd.go | 71 +++---- trie/archiver.go | 434 +++++++++++++++++++++++++++-------------- trie/expired_node.go | 28 +++ trie/trie.go | 4 +- 4 files changed, 353 insertions(+), 184 deletions(-) diff --git a/cmd/geth/archivecmd.go b/cmd/geth/archivecmd.go index a2c6f41b5a..d6e241974e 100644 --- a/cmd/geth/archivecmd.go +++ b/cmd/geth/archivecmd.go @@ -197,7 +197,6 @@ func archiveGenerate(ctx *cli.Context) error { stack, _ := makeConfigNode(ctx) defer stack.Close() - // Open database in write mode (readOnly=false) unless dry-run dryRun := ctx.Bool(archiveDryRunFlag.Name) chaindb := utils.MakeChainDatabase(ctx, stack, dryRun) defer chaindb.Close() @@ -208,27 +207,39 @@ func archiveGenerate(ctx *cli.Context) error { return fmt.Errorf("archive generation requires path-based state scheme, got: %s", scheme) } - // 2. Determine the persistent disk state root. - // - // The archiver reads and writes directly to the raw key-value database, - // bypassing pathdb's in-memory diff layers. This avoids the inconsistency - // where diff layers shadow expiredNode markers written to disk. - // - // The disk root is computed by hashing the account trie root node stored - // in the raw database. This root corresponds to the last state that was - // fully persisted (i.e., PersistentStateID), which matches the canonical - // chain head. + // 2. Flush diff layers to disk via pathdb. This ensures the raw DB + // contains the complete, up-to-date state trie and that state history + // entries are properly written to the freezer. + trieDB := utils.MakeTrieDatabase(ctx, stack, chaindb, false, dryRun, false) + head, hasDiff := trieDB.DiffHead() + if hasDiff { + log.Info("Flushing diff layers to disk", "head", head) + if err := trieDB.Commit(head, true); err != nil { + trieDB.Close() + return fmt.Errorf("failed to flush diff layers: %w", err) + } + log.Info("Diff layers flushed successfully") + } else { + log.Info("No diff layers to flush, disk state is current", "root", head) + } + // Close triedb — we work directly with raw DB for archival. + // We'll re-open it at the end to write a fresh journal. + trieDB.Close() + + // 3. Determine the disk state root (now up-to-date after flush). rootBlob := rawdb.ReadAccountTrieNode(chaindb, nil) if len(rootBlob) == 0 { return errors.New("state trie not found in database") } root := crypto.Keccak256Hash(rootBlob) - log.Info("Using persistent disk state root", "root", root) + log.Info("Using disk state root", "root", root) // Create a raw DB node reader that bypasses pathdb layers nodeDB := &rawDBNodeDatabase{db: chaindb, root: root} - // 3. Open archive writer (unless dry-run) + // 4. Open archive writer (unless dry-run). + // The archive file is placed at /geth/nodearchive by default, + // matching the path used by ArchivedNodeResolver when reading back. var writer *archive.ArchiveWriter archivePath := ctx.String(archiveOutputFlag.Name) if archivePath == "" { @@ -247,7 +258,7 @@ func archiveGenerate(ctx *cli.Context) error { log.Info("Dry run mode - no changes will be made") } - // 4. Create and run archiver + // 5. Create and run archiver archiver := trie.NewArchiver( chaindb, nodeDB, @@ -261,7 +272,7 @@ func archiveGenerate(ctx *cli.Context) error { return fmt.Errorf("archive generation failed: %w", err) } - // 5. Get stats and optionally run final compaction + // 6. Get stats and optionally run final compaction subtrees, leaves, bytesDeleted := archiver.Stats() if !dryRun && subtrees > 0 { @@ -271,28 +282,22 @@ func archiveGenerate(ctx *cli.Context) error { } } + // 7. Re-journal the pathdb state with the current disk root. + // After archiving, some trie nodes have been replaced with expired + // markers. We re-open pathdb and write a fresh journal (disk layer + // only, since all diff layers were flushed in step 2) so that geth + // can restart cleanly. if !dryRun { - // Delete the pathdb journal. The archiver modified the raw DB - // underneath the diff layers, so the journal's buffered state is - // inconsistent. Deleting forces geth to restart with a bare disk - // layer and rewind the chain head to the disk state. - if err := chaindb.Delete([]byte("TrieJournal")); err != nil { - log.Warn("Failed to delete pathdb journal key", "err", err) - } - log.Info("Deleted pathdb journal to force clean restart") - - // Delete journal file(s) - check both legacy and current locations - for _, dir := range []string{"triedb", ""} { - for _, name := range []string{"merkle.journal", "verkle.journal"} { - journalFile := filepath.Join(stack.ResolvePath(dir), name) - if err := os.Remove(journalFile); err == nil { - log.Info("Deleted journal file", "path", journalFile) - } - } + log.Info("Re-journaling pathdb state") + freshTrieDB := utils.MakeTrieDatabase(ctx, stack, chaindb, false, false, false) + freshRoot := crypto.Keccak256Hash(rawdb.ReadAccountTrieNode(chaindb, nil)) + if err := freshTrieDB.Journal(freshRoot); err != nil { + log.Warn("Failed to re-journal pathdb state", "err", err) } + freshTrieDB.Close() } - // 6. Print summary + // 8. Print summary var archiveSize uint64 if writer != nil { archiveSize = writer.Offset() diff --git a/trie/archiver.go b/trie/archiver.go index 817257c087..32b2eae711 100644 --- a/trie/archiver.go +++ b/trie/archiver.go @@ -46,6 +46,12 @@ type subtreeInfo struct { // Archiver handles the archival process of trie nodes. // It walks the state trie, identifies subtrees at height 3, // archives their leaf data, and replaces them with expiredNode markers. +// +// The archiver uses a streaming approach: it walks the trie using a +// NodeIterator, probes each node's height via bounded raw DB reads, +// and archives subtrees immediately when found. This keeps memory +// usage proportional to the iterator stack depth + the current subtree +// being processed, rather than loading the entire trie into memory. type Archiver struct { db ethdb.Database triedb database.NodeDatabase @@ -134,23 +140,69 @@ func (a *Archiver) ProcessState(root common.Hash) error { return nil } -// processTrie finds and archives all height-3 subtrees in the trie. +// processTrie finds and archives all height-3 subtrees in the trie using +// a streaming approach. It walks the trie with a NodeIterator, probes each +// node's height via bounded raw DB reads, and archives subtrees immediately. +// +// Memory usage is O(iterator_stack_depth + current_subtree_size) instead of +// O(entire_trie) as with the previous recursive approach. func (a *Archiver) processTrie(owner common.Hash, t *Trie) error { if t.root == nil { return nil } - subtrees := a.findHeight3Subtrees(t.root, nil, owner) - log.Info("Found subtrees to archive", "owner", owner, "count", len(subtrees)) + iter, err := t.NodeIterator(nil) + if err != nil { + return fmt.Errorf("failed to create node iterator: %w", err) + } - lastLog := time.Now() - for i, info := range subtrees { + var ( + lastLog = time.Now() + found uint64 + ) + + for iter.Next(true) { + if iter.Leaf() { + continue + } + + // Progress logging if time.Since(lastLog) > 30*time.Second { - log.Info("Archiving subtrees", "owner", owner, "progress", fmt.Sprintf("%d/%d", i, len(subtrees)), "archived", a.subtreesArchived) + log.Info("Scanning trie for subtrees", + "owner", owner, + "path", common.Bytes2Hex(iter.Path()), + "found", found, + "archived", a.subtreesArchived) lastLog = time.Now() } + + path := copyBytes(iter.Path()) + hash := iter.Hash() + if hash == (common.Hash{}) { + // Embedded node (no hash), skip — it will be part of a + // parent subtree. + continue + } + + // Probe subtree height via bounded raw DB reads. + // This does NOT load the trie into memory — it reads blobs from + // the DB, decodes them, computes height, and discards them. + height := a.probeHeight(owner, path, hash, 3) + if height != 3 { + // Too small to archive; the iterator will visit children. + // Too tall — descend into children to find height-3 subtrees. + continue + } + + // height == 3: collect and archive this subtree immediately. + info := a.collectSubtree(owner, path, hash) + if info == nil { + continue + } + found++ + if err := a.archiveSubtree(info); err != nil { - log.Warn("Failed to archive subtree", "path", common.Bytes2Hex(info.path), "err", err) + log.Warn("Failed to archive subtree", "path", common.Bytes2Hex(path), "err", err) continue } a.subtreesArchived++ @@ -159,178 +211,275 @@ func (a *Archiver) processTrie(owner common.Hash, t *Trie) error { if err := a.maybeCompact(); err != nil { log.Warn("Compaction failed", "err", err) } + + // Skip children — they're now archived. + // We call Next(false) to move past the subtree without descending. + iter.Next(false) } + + if iter.Error() != nil { + return fmt.Errorf("iterator error: %w", iter.Error()) + } + + log.Info("Found subtrees to archive", "owner", owner, "count", found) return nil } -// findHeight3Subtrees recursively finds all subtrees with height == 3. +// probeHeight computes the height of a node by reading from the raw DB. +// It stops early once height exceeds maxHeight (returns maxHeight+1). +// The decoded nodes are not retained — they are discarded after inspection. +// // Height is measured from leaves: leaves=0, their parents=1, etc. -func (a *Archiver) findHeight3Subtrees(n node, path []byte, owner common.Hash) []*subtreeInfo { - info, err := a.computeSubtreeInfo(n, path, owner) - if err != nil { - // computeSubtreeInfo failed (e.g. unresolvable hashNode within the - // subtree). We cannot archive this node as-is, but deeper children - // may still form valid height-3 subtrees. Recurse into them. - log.Debug("computeSubtreeInfo failed, trying children", "path", common.Bytes2Hex(path), "err", err) - return a.findSubtreesInChildren(n, path, owner) +func (a *Archiver) probeHeight(owner common.Hash, path []byte, hash common.Hash, maxHeight int) int { + blob := a.readNodeBlob(owner, path) + if len(blob) == 0 { + return 0 } - if info == nil { + + // Already expired — skip. + if blob[0] == expiredNodeMarker { + return -1 + } + + n, err := decodeNodeUnsafe(hash[:], blob) + if err != nil { + return 0 + } + + return a.nodeHeight(n, path, owner, maxHeight) +} + +// nodeHeight computes the height of a decoded node, bounded by maxHeight. +// Returns maxHeight+1 early if the subtree is taller than maxHeight. +func (a *Archiver) nodeHeight(n node, path []byte, owner common.Hash, maxHeight int) int { + switch n := n.(type) { + case nil: + return 0 + + case valueNode: + return 0 + + case *shortNode: + childPath := append(append([]byte{}, path...), n.Key...) + switch child := n.Val.(type) { + case valueNode: + return 1 // shortNode → leaf + case hashNode: + if maxHeight <= 1 { + return maxHeight + 1 + } + childHeight := a.probeHeight(owner, childPath, common.BytesToHash(child), maxHeight-1) + if childHeight < 0 { + return -1 // expired child + } + return childHeight + 1 + default: + // Inline node + childHeight := a.nodeHeight(child, childPath, owner, maxHeight-1) + if childHeight < 0 { + return -1 + } + return childHeight + 1 + } + + case *fullNode: + maxH := 0 + for i, child := range n.Children[:16] { + if child == nil { + continue + } + childPath := append(append([]byte{}, path...), byte(i)) + var childHeight int + switch c := child.(type) { + case valueNode: + childHeight = 0 + case hashNode: + if maxH+1 > maxHeight { + return maxHeight + 1 + } + childHeight = a.probeHeight(owner, childPath, common.BytesToHash(c), maxHeight-1) + default: + childHeight = a.nodeHeight(c, childPath, owner, maxHeight-1) + } + if childHeight < 0 { + continue // expired child, skip + } + h := childHeight + 1 + if h > maxH { + maxH = h + } + if maxH > maxHeight { + return maxHeight + 1 + } + } + return maxH + + case hashNode: + return a.probeHeight(owner, path, common.BytesToHash(n), maxHeight) + + case *expiredNode: + return -1 + } + return 0 +} + +// collectSubtree reads a height-3 subtree from the raw DB and collects its +// leaves and node paths for archival. The subtree is bounded (height ≤ 3), +// so memory usage is limited. +func (a *Archiver) collectSubtree(owner common.Hash, path []byte, hash common.Hash) *subtreeInfo { + blob := a.readNodeBlob(owner, path) + if len(blob) == 0 { + return nil + } + if blob[0] == expiredNodeMarker { return nil } - // If this subtree has height 3, it's a candidate for archival - if info.height == 3 { - // Capture the original subtree root hash for verification. - // The hash is available from the node that was passed in: - // - hashNode: the hash IS the node - // - fullNode/shortNode: loaded from DB, flags.hash is set - switch nn := n.(type) { - case hashNode: - info.rootHash = common.BytesToHash(nn) - case *fullNode: - if nn.flags.hash != nil { - info.rootHash = common.BytesToHash(nn.flags.hash) - } - case *shortNode: - if nn.flags.hash != nil { - info.rootHash = common.BytesToHash(nn.flags.hash) - } - } - return []*subtreeInfo{info} + n, err := decodeNodeUnsafe(hash[:], blob) + if err != nil { + log.Warn("Failed to decode node for collection", "path", common.Bytes2Hex(path), "err", err) + return nil } - // If height > 3, recurse into children to find height-3 subtrees - if info.height > 3 { - return a.findSubtreesInChildren(n, path, owner) + info := &subtreeInfo{ + path: copyBytes(path), + owner: owner, + rootHash: hash, } - // Height < 3: no archivable subtrees here - return nil + leaves, nodePaths, height, err := a.collectNodeLeaves(n, path, nil, owner) + if err != nil { + log.Warn("Failed to collect subtree leaves", "path", common.Bytes2Hex(path), "err", err) + return nil + } + + info.height = height + info.leaves = leaves + info.nodePaths = append([][]byte{copyBytes(path)}, nodePaths...) + return info } -// findSubtreesInChildren recurses into the children of a node to find -// height-3 subtrees. Used both by the normal height > 3 path and as a -// fallback when computeSubtreeInfo fails for a node. -func (a *Archiver) findSubtreesInChildren(n node, path []byte, owner common.Hash) []*subtreeInfo { - var results []*subtreeInfo - switch n := n.(type) { - case *fullNode: - for i, child := range n.Children[:16] { - if child != nil { - childPath := append(append([]byte{}, path...), byte(i)) - results = append(results, a.findHeight3Subtrees(child, childPath, owner)...) - } - } - case *shortNode: - childPath := append(append([]byte{}, path...), n.Key...) - results = append(results, a.findHeight3Subtrees(n.Val, childPath, owner)...) - case hashNode: - // Resolve and recurse - resolved, err := a.resolveNode(n, path, owner) - if err == nil { - results = append(results, a.findHeight3Subtrees(resolved, path, owner)...) - } - } - return results -} - -// computeSubtreeInfo computes height and collects leaves for a subtree. -// Returns (nil, nil) if the node is nil, already expired, or has no leaves. -// Returns (nil, error) if any constituent node could not be resolved — the -// caller MUST NOT archive a subtree when an error is returned, as the leaf -// set would be incomplete. -func (a *Archiver) computeSubtreeInfo(n node, path []byte, owner common.Hash) (*subtreeInfo, error) { +// collectNodeLeaves recursively collects all leaves and node paths in a +// bounded subtree. relPath is the path relative to the subtree root. +// Returns (leaves, nodePaths, height, error). +func (a *Archiver) collectNodeLeaves(n node, absPath, relPath []byte, owner common.Hash) ([]*archive.Record, [][]byte, int, error) { switch n := n.(type) { case nil: - return nil, nil + return nil, nil, 0, nil case valueNode: - // Leaf: height 0 - return &subtreeInfo{ - path: copyBytes(path), - owner: owner, - height: 0, - leaves: []*archive.Record{{ - Path: nil, // Empty relative path for leaf at root - Value: []byte(n), - }}, - nodePaths: [][]byte{copyBytes(path)}, - }, nil + return []*archive.Record{{ + Path: copyBytes(relPath), + Value: []byte(n), + }}, nil, 0, nil case *shortNode: - childPath := append(append([]byte{}, path...), n.Key...) - childInfo, err := a.computeSubtreeInfo(n.Val, childPath, owner) + childAbsPath := append(append([]byte{}, absPath...), n.Key...) + var childNode node + switch c := n.Val.(type) { + case hashNode: + resolved, err := a.resolveRawNode(owner, childAbsPath, common.BytesToHash(c)) + if err != nil { + return nil, nil, 0, fmt.Errorf("resolve shortNode child at %s: %w", common.Bytes2Hex(childAbsPath), err) + } + childNode = resolved + default: + childNode = c + } + + // Pass nil relPath to child — we prepend the key ourselves + leaves, nodePaths, height, err := a.collectNodeLeaves(childNode, childAbsPath, nil, owner) if err != nil { - return nil, fmt.Errorf("shortNode key=%x: %w", n.Key, err) - } - if childInfo == nil { - return nil, nil + return nil, nil, 0, err } - // Adjust relative paths in leaves to include this node's key - for _, leaf := range childInfo.leaves { - leaf.Path = append(append([]byte{}, n.Key...), leaf.Path...) + // Prepend [relPath + extension key] to leaf relative paths + prefix := append(append([]byte{}, relPath...), n.Key...) + for _, leaf := range leaves { + leaf.Path = append(append([]byte{}, prefix...), leaf.Path...) } - return &subtreeInfo{ - path: copyBytes(path), - owner: owner, - height: childInfo.height + 1, - leaves: childInfo.leaves, - nodePaths: append([][]byte{copyBytes(path)}, childInfo.nodePaths...), - }, nil + return leaves, append([][]byte{copyBytes(absPath)}, nodePaths...), height + 1, nil case *fullNode: var ( - maxHeight = 0 allLeaves []*archive.Record - allPaths = [][]byte{copyBytes(path)} + allPaths [][]byte + maxHeight int ) for i, child := range n.Children[:16] { - if child != nil { - childPath := append(append([]byte{}, path...), byte(i)) - childInfo, err := a.computeSubtreeInfo(child, childPath, owner) + if child == nil { + continue + } + childAbsPath := append(append([]byte{}, absPath...), byte(i)) + + var childNode node + switch c := child.(type) { + case hashNode: + resolved, err := a.resolveRawNode(owner, childAbsPath, common.BytesToHash(c)) if err != nil { - return nil, fmt.Errorf("fullNode child[%x]: %w", i, err) - } - if childInfo != nil { - if childInfo.height+1 > maxHeight { - maxHeight = childInfo.height + 1 - } - // Adjust relative paths to include the branch index - for _, leaf := range childInfo.leaves { - leaf.Path = append([]byte{byte(i)}, leaf.Path...) - } - allLeaves = append(allLeaves, childInfo.leaves...) - allPaths = append(allPaths, childInfo.nodePaths...) + return nil, nil, 0, fmt.Errorf("resolve fullNode child[%x] at %s: %w", i, common.Bytes2Hex(childAbsPath), err) } + childNode = resolved + default: + childNode = c + } + + // Pass nil relPath to child — we prepend the index ourselves + leaves, nodePaths, height, err := a.collectNodeLeaves(childNode, childAbsPath, nil, owner) + if err != nil { + return nil, nil, 0, err + } + + // Prepend [relPath + branch index] to leaf relative paths + prefix := append(append([]byte{}, relPath...), byte(i)) + for _, leaf := range leaves { + leaf.Path = append(append([]byte{}, prefix...), leaf.Path...) + } + + allLeaves = append(allLeaves, leaves...) + allPaths = append(allPaths, nodePaths...) + h := height + 1 + if h > maxHeight { + maxHeight = h } } - - if len(allLeaves) == 0 { - return nil, nil - } - - return &subtreeInfo{ - path: copyBytes(path), - owner: owner, - height: maxHeight, - leaves: allLeaves, - nodePaths: allPaths, - }, nil + return allLeaves, allPaths, maxHeight, nil case hashNode: - resolved, err := a.resolveNode(n, path, owner) + resolved, err := a.resolveRawNode(owner, absPath, common.BytesToHash(n)) if err != nil { - return nil, fmt.Errorf("failed to resolve hashNode at path %s: %w", common.Bytes2Hex(path), err) + return nil, nil, 0, err } - return a.computeSubtreeInfo(resolved, path, owner) + return a.collectNodeLeaves(resolved, absPath, relPath, owner) case *expiredNode: - // Already archived, skip - return nil, nil + return nil, nil, 0, nil } - return nil, nil + return nil, nil, 0, nil +} + +// readNodeBlob reads a trie node blob directly from the raw key-value +// database, bypassing pathdb layers. +func (a *Archiver) readNodeBlob(owner common.Hash, path []byte) []byte { + if owner == (common.Hash{}) { + return rawdb.ReadAccountTrieNode(a.db, path) + } + return rawdb.ReadStorageTrieNode(a.db, owner, path) +} + +// resolveRawNode reads and decodes a trie node directly from the raw DB. +// Unlike resolveNode, this does NOT use the trie database (no caching, +// no diff layers). The decoded node is ephemeral and will be GC'd after use. +func (a *Archiver) resolveRawNode(owner common.Hash, path []byte, hash common.Hash) (node, error) { + blob := a.readNodeBlob(owner, path) + if len(blob) == 0 { + return nil, fmt.Errorf("node not found: owner=%s path=%s", owner, common.Bytes2Hex(path)) + } + if blob[0] == expiredNodeMarker { + return &expiredNode{}, nil + } + return decodeNodeUnsafe(hash[:], blob) } // archiveSubtree writes leaves to archive and replaces subtree with expiredNode. @@ -424,19 +573,6 @@ func (a *Archiver) maybeCompact() error { return nil } -// resolveNode resolves a hashNode to its actual node content. -func (a *Archiver) resolveNode(hash hashNode, path []byte, owner common.Hash) (node, error) { - reader, err := a.triedb.NodeReader(a.stateRoot) - if err != nil { - return nil, err - } - blob, err := reader.Node(owner, path, common.BytesToHash(hash)) - if err != nil { - return nil, err - } - return decodeNodeUnsafe(hash, blob) -} - // encodeExpiredNodeBlob creates the raw bytes for an expiredNode. // Format: 1-byte marker (0x00) + 8-byte offset + 8-byte size = 17 bytes func encodeExpiredNodeBlob(offset, size uint64) []byte { diff --git a/trie/expired_node.go b/trie/expired_node.go index 9a93f137c8..ce622daa03 100644 --- a/trie/expired_node.go +++ b/trie/expired_node.go @@ -20,7 +20,9 @@ import ( "bytes" "encoding/binary" "fmt" + "time" + "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie/archive" ) @@ -68,6 +70,7 @@ func (n *expiredNode) SetArchiveResolver(resolver archive.ResolverFn) { // the reconstructed subtree hash, and stamps the cached hash onto the root. // Returns an error if the archive data is corrupted (hash mismatch). func resolveExpiredNodeData(n *expiredNode) (node, error) { + start := time.Now() records, err := archive.ArchivedNodeResolver(n.offset, n.size) if err != nil { return nil, fmt.Errorf("failed to resolve expired node: %w", err) @@ -76,6 +79,11 @@ func resolveExpiredNodeData(n *expiredNode) (node, error) { if err != nil { return nil, fmt.Errorf("failed to rebuild expired node from archive: %w", err) } + depth := subtreeDepth(resolved) + log.Debug("Resurrected expired node from archive", + "offset", n.offset, "archiveBytes", n.size, + "records", len(records), "depth", depth, + "elapsed", time.Since(start)) // Verify hash integrity: if the original hash is known, check that the // reconstructed subtree produces the same hash. A mismatch means the // archive is corrupted (e.g. missing leaves due to unresolvable hashNodes @@ -113,6 +121,26 @@ func resolveExpiredNodeData(n *expiredNode) (node, error) { return resolved, nil } +// subtreeDepth returns the maximum depth of a trie subtree. +func subtreeDepth(n node) int { + switch n := n.(type) { + case *fullNode: + max := 0 + for _, child := range &n.Children { + if child != nil { + if d := subtreeDepth(child); d > max { + max = d + } + } + } + return 1 + max + case *shortNode: + return 1 + subtreeDepth(n.Val) + default: + return 0 + } +} + // markSubtreeDirty recursively marks all fullNode and shortNode in the // subtree as dirty, preserving any cached hashes. This ensures the // committer will capture them in the NodeSet during trie commit. diff --git a/trie/trie.go b/trie/trie.go index d8282a4e2d..69db68b515 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -503,7 +503,7 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error return true, nn, nil case *expiredNode: - log.Info("Resolving expired node in insert()", "owner", t.owner, "offset", n.offset, "size", n.size) + log.Debug("Resolving expired node in insert()", "owner", t.owner, "offset", n.offset, "size", n.size) rn, err := resolveExpiredNodeData(n) if err != nil { return false, nil, err @@ -676,7 +676,7 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { return true, nn, nil case *expiredNode: - log.Info("Resolving expired node in delete()", "owner", t.owner, "offset", n.offset, "size", n.size) + log.Debug("Resolving expired node in delete()", "owner", t.owner, "offset", n.offset, "size", n.size) rn, err := resolveExpiredNodeData(n) if err != nil { return false, nil, err From 34baa98ea9cfb20cdf481296da4b11321b7900eb Mon Sep 17 00:00:00 2001 From: Guillaume Ballet <3272758+gballet@users.noreply.github.com> Date: Thu, 12 Mar 2026 14:15:07 +0100 Subject: [PATCH 8/9] triedb/pathdb: fix TestTruncateOutOfRange after graceful head truncation change (#573) The streaming archival PR changed truncateFromHead to return nil (instead of an error) when nhead > ohead, gracefully handling unclean shutdowns where state history was not fully written. Update the test to expect nil for the head+1 case. Co-authored-by: tellabg <249254436+tellabg@users.noreply.github.com> --- triedb/pathdb/history_state_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/triedb/pathdb/history_state_test.go b/triedb/pathdb/history_state_test.go index 5c3026a571..3e75d3baa8 100644 --- a/triedb/pathdb/history_state_test.go +++ b/triedb/pathdb/history_state_test.go @@ -244,8 +244,8 @@ func TestTruncateOutOfRange(t *testing.T) { target uint64 expErr error }{ - {0, head, nil}, // nothing to delete - {0, head + 1, errHeadTruncationOutOfRange}, + {0, head, nil}, // nothing to delete + {0, head + 1, nil}, // gracefully handled after unclean shutdown {0, tail - 1, errHeadTruncationOutOfRange}, {1, tail, nil}, // nothing to delete {1, head + 1, errTailTruncationOutOfRange}, From a764b364496a729937c2492e06c2b895e8fefad0 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet <3272758+gballet@users.noreply.github.com> Date: Thu, 11 Jun 2026 13:45:23 +0200 Subject: [PATCH 9/9] drop bintrie support for now --- trie/bintrie/expired_node.go | 176 ------------------- trie/bintrie/expired_node_test.go | 277 ------------------------------ 2 files changed, 453 deletions(-) delete mode 100644 trie/bintrie/expired_node.go delete mode 100644 trie/bintrie/expired_node_test.go diff --git a/trie/bintrie/expired_node.go b/trie/bintrie/expired_node.go deleted file mode 100644 index d3b90ee9ea..0000000000 --- a/trie/bintrie/expired_node.go +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2026 go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package bintrie - -import ( - "fmt" - - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/trie/archive" -) - -// expiredNode represents a node whose data has been archived. -// It stores the file offset and size of the archived subtree data. -type expiredNode struct { - Offset uint64 - Size uint64 - depth int - archiveResolver archive.ResolverFn -} - -func archiveRecordsToNode(records []*archive.Record, depth int) (BinaryNode, error) { - if len(records) == 0 { - return nil, archive.EmptyArchiveRecord - } - if len(records) == 1 { - return DeserializeNode(records[0].Value, depth) - } - - var ( - newnode InternalNode - curnode *InternalNode - ) - for _, record := range records { - curnode = &newnode - resolved, err := DeserializeNode(record.Value, depth) - if err != nil { - return nil, err - } - // It's not needed to resurrect all nodes, nodes - // not along the path of what has been asked can - // be updated as expired. This is for v2. - for i, b := range record.Path { - var child BinaryNode - if b == 0 { - child = curnode.left - } else { - child = curnode.right - } - if child == nil { - if i < len(record.Path)-1 { - child = &InternalNode{depth: depth} - } else { - // Not good, I need to update the pointer - child = resolved - } - } - depth++ - } - } - return &newnode, nil -} - -func (n *expiredNode) Get(key []byte, resolver NodeResolverFn) ([]byte, error) { - if n.archiveResolver == nil { - return nil, archive.ErrNoResolver - } - records, err := n.archiveResolver(n.Offset, n.Size) - if err != nil { - return nil, fmt.Errorf("failed to resolve expired node: %w", err) - } - - resolved, err := archiveRecordsToNode(records, n.depth) - if err != nil { - return nil, fmt.Errorf("failed to deserialize expired node: %w", err) - } - return resolved.Get(key, resolver) -} - -func (n *expiredNode) Insert(key, value []byte, resolver NodeResolverFn, depth int) (BinaryNode, error) { - if n.archiveResolver == nil { - return nil, archive.ErrNoResolver - } - blob, err := n.archiveResolver(n.Offset, n.Size) - if err != nil { - return nil, fmt.Errorf("failed to resolve expired node: %w", err) - } - resolved, err := archiveRecordsToNode(blob, n.depth) - if err != nil { - return nil, fmt.Errorf("failed to deserialize expired node: %w", err) - } - return resolved.Insert(key, value, resolver, depth) -} - -func (n *expiredNode) Copy() BinaryNode { - return &expiredNode{ - Offset: n.Offset, - Size: n.Size, - depth: n.depth, - archiveResolver: n.archiveResolver, - } -} - -func (n *expiredNode) Hash() common.Hash { - return common.Hash{} -} - -func (n *expiredNode) GetValuesAtStem(stem []byte, resolver NodeResolverFn) ([][]byte, error) { - if n.archiveResolver == nil { - return nil, archive.ErrNoResolver - } - blob, err := n.archiveResolver(n.Offset, n.Size) - if err != nil { - return nil, fmt.Errorf("failed to resolve expired node: %w", err) - } - resolved, err := archiveRecordsToNode(blob, n.depth) - if err != nil { - return nil, fmt.Errorf("failed to deserialize expired node: %w", err) - } - return resolved.GetValuesAtStem(stem, resolver) -} - -func (n *expiredNode) InsertValuesAtStem(stem []byte, values [][]byte, resolver NodeResolverFn, depth int) (BinaryNode, error) { - if n.archiveResolver == nil { - return nil, archive.ErrNoResolver - } - blob, err := n.archiveResolver(n.Offset, n.Size) - if err != nil { - return nil, fmt.Errorf("failed to resolve expired node: %w", err) - } - resolved, err := archiveRecordsToNode(blob, n.depth) - if err != nil { - return nil, fmt.Errorf("failed to deserialize expired node: %w", err) - } - return resolved.InsertValuesAtStem(stem, values, resolver, depth) -} - -func (n *expiredNode) CollectNodes(path []byte, flushfn NodeFlushFn) error { - return nil -} - -func (n *expiredNode) toDot(parent, path string) string { - me := fmt.Sprintf("expired%s", path) - ret := fmt.Sprintf("%s [label=\"EXPIRED: offset=%d\"]\n", me, n.Offset) - if len(parent) > 0 { - ret = fmt.Sprintf("%s %s -> %s\n", ret, parent, me) - } - return ret -} - -func (n *expiredNode) GetHeight() int { - return 0 -} - -// SetArchiveResolver sets the resolver function for this expired node. -func (n *expiredNode) SetArchiveResolver(resolver archive.ResolverFn) { - n.archiveResolver = resolver -} - -// Depth returns the depth of this node in the trie. -func (n *expiredNode) Depth() int { - return n.depth -} diff --git a/trie/bintrie/expired_node_test.go b/trie/bintrie/expired_node_test.go deleted file mode 100644 index ca9a7548cb..0000000000 --- a/trie/bintrie/expired_node_test.go +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright 2026 go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package bintrie - -import ( - "bytes" - "errors" - "testing" - - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/trie/archive" -) - -func TestExpiredNodeSerializeDeserialize(t *testing.T) { - testCases := []struct { - offset uint64 - size uint64 - }{ - {0, 0}, - {1, 100}, - {255, 1024}, - {256, 4096}, - {1 << 16, 1 << 20}, - {1 << 32, 1 << 32}, - {1<<64 - 1, 1<<64 - 1}, - } - - for _, tc := range testCases { - original := &expiredNode{Offset: tc.offset, Size: tc.size, depth: 5} - serialized := SerializeNode(original) - - deserialized, err := DeserializeNode(serialized, 5) - if err != nil { - t.Fatalf("failed to deserialize expired node with offset %d, size %d: %v", tc.offset, tc.size, err) - } - - expNode, ok := deserialized.(*expiredNode) - if !ok { - t.Fatalf("deserialized node is not an expired node, got %T", deserialized) - } - - if expNode.Offset != original.Offset { - t.Errorf("offset mismatch: got %d, want %d", expNode.Offset, original.Offset) - } - - if expNode.Size != original.Size { - t.Errorf("size mismatch: got %d, want %d", expNode.Size, original.Size) - } - - if expNode.depth != original.depth { - t.Errorf("depth mismatch: got %d, want %d", expNode.depth, original.depth) - } - } -} - -func TestExpiredNodeSerializedFormat(t *testing.T) { - node := &expiredNode{Offset: 0x0102030405060708, Size: 0x1112131415161718, depth: 0} - serialized := SerializeNode(node) - - expected := []byte{ - nodeTypeExpired, - 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, - } - if !bytes.Equal(serialized, expected) { - t.Errorf("serialized format mismatch: got %x, want %x", serialized, expected) - } -} - -func TestExpiredNodeSerializedSize(t *testing.T) { - node := &expiredNode{Offset: 12345, Size: 6789, depth: 0} - serialized := SerializeNode(node) - - if len(serialized) != NodeTypeBytes+2*archive.OffsetSize { - t.Errorf("serialized size mismatch: got %d, want %d", len(serialized), NodeTypeBytes+2*archive.OffsetSize) - } -} - -func TestExpiredNodeInvalidLength(t *testing.T) { - invalidCases := [][]byte{ - {nodeTypeExpired}, - {nodeTypeExpired, 0x01}, - {nodeTypeExpired, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {nodeTypeExpired, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}, - {nodeTypeExpired, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11}, - } - - for _, buf := range invalidCases { - _, err := DeserializeNode(buf, 0) - if err == nil { - t.Errorf("expected error for buffer length %d, got nil", len(buf)) - } - } -} - -func TestExpiredNodeHash(t *testing.T) { - node := &expiredNode{Offset: 100, depth: 5} - hash := node.Hash() - - if hash != (common.Hash{}) { - t.Errorf("expected zero hash, got %x", hash) - } -} - -func TestExpiredNodeGetHeight(t *testing.T) { - node := &expiredNode{Offset: 100, depth: 5} - height := node.GetHeight() - - if height != 0 { - t.Errorf("expected height 0, got %d", height) - } -} - -func TestExpiredNodeCollectNodes(t *testing.T) { - node := &expiredNode{Offset: 100, depth: 5} - called := false - err := node.CollectNodes(nil, func(path []byte, n BinaryNode) { - called = true - }) - - if err != nil { - t.Errorf("unexpected error: %v", err) - } - if called { - t.Error("flush function should not be called for expired nodes") - } -} - -func TestExpiredNodeToDot(t *testing.T) { - node := &expiredNode{Offset: 12345, depth: 5} - dot := node.toDot("parent", "path") - - if dot == "" { - t.Error("toDot should return non-empty string") - } -} - -func TestExpiredNodeCopy(t *testing.T) { - resolver := func(offset, size uint64) ([]*archive.Record, error) { - return nil, nil - } - - original := &expiredNode{ - Offset: 12345, - Size: 6789, - depth: 5, - archiveResolver: resolver, - } - - copied := original.Copy() - copiedExp, ok := copied.(*expiredNode) - if !ok { - t.Fatalf("copied node is not an expired node, got %T", copied) - } - - if copiedExp.Offset != original.Offset { - t.Errorf("offset mismatch: got %d, want %d", copiedExp.Offset, original.Offset) - } - - if copiedExp.Size != original.Size { - t.Errorf("size mismatch: got %d, want %d", copiedExp.Size, original.Size) - } - - if copiedExp.depth != original.depth { - t.Errorf("depth mismatch: got %d, want %d", copiedExp.depth, original.depth) - } - - if copiedExp.archiveResolver == nil { - t.Error("archive resolver was not copied") - } -} - -func TestExpiredNodeNoResolver(t *testing.T) { - node := &expiredNode{Offset: 100, depth: 5} - - _, err := node.Get(make([]byte, 32), nil) - if !errors.Is(err, archive.ErrNoResolver) { - t.Errorf("Get: expected archive.ErrNoResolver, got %v", err) - } - - _, err = node.Insert(make([]byte, 32), make([]byte, 32), nil, 0) - if !errors.Is(err, archive.ErrNoResolver) { - t.Errorf("Insert: expected archive.ErrNoResolver, got %v", err) - } - - _, err = node.GetValuesAtStem(make([]byte, StemSize), nil) - if !errors.Is(err, archive.ErrNoResolver) { - t.Errorf("GetValuesAtStem: expected archive.ErrNoResolver, got %v", err) - } - - _, err = node.InsertValuesAtStem(make([]byte, StemSize), make([][]byte, StemNodeWidth), nil, 0) - if !errors.Is(err, archive.ErrNoResolver) { - t.Errorf("InsertValuesAtStem: expected archive.ErrNoResolver, got %v", err) - } -} - -func TestExpiredNodeWithResolver(t *testing.T) { - var key [32]byte - copy(key[:StemSize], make([]byte, StemSize)) - key[StemSize] = 0 - - var values [StemNodeWidth][]byte - values[0] = make([]byte, HashSize) - copy(values[0], []byte("testvalue")) - - stemNode := &StemNode{ - Stem: key[:StemSize], - Values: values[:], - depth: 5, - } - serializedStem := SerializeNode(stemNode) - - resolver := func(offset, size uint64) ([]*archive.Record, error) { - if offset == 100 { - return []*archive.Record{{Value: serializedStem}}, nil - } - return nil, errors.New("unknown offset") - } - - node := &expiredNode{ - Offset: 100, - Size: uint64(len(serializedStem)), - depth: 5, - archiveResolver: resolver, - } - - vals, err := node.GetValuesAtStem(key[:StemSize], nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if vals == nil { - t.Fatal("expected non-nil values") - } - - if !bytes.HasPrefix(vals[0], []byte("testvalue")) { - t.Errorf("value mismatch: got %q", vals[0]) - } -} - -func TestExpiredNodeDepth(t *testing.T) { - node := &expiredNode{Offset: 100, depth: 42} - if node.Depth() != 42 { - t.Errorf("expected depth 42, got %d", node.Depth()) - } -} - -func TestExpiredNodeSetArchiveResolver(t *testing.T) { - node := &expiredNode{Offset: 100, depth: 5} - - if node.archiveResolver != nil { - t.Error("expected nil archive resolver initially") - } - - resolver := func(offset, size uint64) ([]*archive.Record, error) { - return nil, nil - } - node.SetArchiveResolver(resolver) - - if node.archiveResolver == nil { - t.Error("expected non-nil archive resolver after setting") - } -}