go-ethereum/trie/expired_node.go
Emualliug Tellab 2ee9408be5 trie/archiver: streaming archival + pathdb journal consistency (#567)
* trie/archiver: streaming subtree archival to fix OOM

Replace the recursive approach that loaded the entire trie into memory
with a streaming NodeIterator-based approach:

- processTrie now uses NodeIterator to walk the trie node-by-node
- probeHeight reads nodes from raw DB, computes height bounded at 3,
  and discards decoded nodes immediately (no in-memory trie buildup)
- collectSubtree only materializes the bounded height-3 subtree being
  archived (at most ~4096 nodes)
- Memory usage: O(iterator_stack) + O(current_subtree) instead of
  O(entire_trie)

This fixes OOM kills on large storage tries (e.g. contracts with
millions of storage slots) where the previous approach would load
all nodes and subtreeInfo into memory before archiving any of them.

* cmd/geth: flush diff layers before archiving, re-journal after

Instead of deleting the pathdb journal after archive generation (which
breaks the chain head and prevents block imports), properly integrate
with pathdb:

1. Open triedb with pathdb support (not just raw KV)
2. Disable state history freezer (avoid append gaps)
3. Flush all diff layers to disk via Commit() before archiving
4. After archiving, re-journal the pathdb state (disk layer only)

This ensures geth can restart cleanly after archiving and continue
importing blocks without 'unknown ancestor' errors.

* trie: add resurrection timing and depth metrics to expired node resolution

* Update trie/archiver.go

---------

Co-authored-by: Guillaume Ballet <3272758+gballet@users.noreply.github.com>
2026-06-11 13:02:04 +02:00

262 lines
8 KiB
Go

// Copyright 2026 go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
"bytes"
"encoding/binary"
"fmt"
"time"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie/archive"
)
// expiredNodeMarker is a special marker byte to identify expired nodes.
// Using 0x00 as a marker since valid MPT nodes are always RLP lists (starting with 0xc0+).
const expiredNodeMarker = 0x00
// expiredNode represents a node whose data has been archived.
// It stores the file offset and size of the archived data.
type expiredNode struct {
offset uint64
size uint64
cachedHash hashNode
archiveResolver archive.ResolverFn
}
func (n *expiredNode) cache() (hashNode, bool) {
return n.cachedHash, n.cachedHash == nil
}
func (n *expiredNode) encode(w rlp.EncoderBuffer) {
var buf [1 + 2*archive.OffsetSize]byte
buf[0] = expiredNodeMarker
binary.BigEndian.PutUint64(buf[1:], n.offset)
binary.BigEndian.PutUint64(buf[1+archive.OffsetSize:], n.size)
w.Write(buf[:])
}
func (n *expiredNode) fstring(ind string) string {
return fmt.Sprintf("<expired: offset=%d, size=%d> ", n.offset, n.size)
}
// Offset returns the archive file offset for this expired node.
func (n *expiredNode) Offset() uint64 {
return n.offset
}
// SetArchiveResolver sets the resolver function for this expired node.
func (n *expiredNode) SetArchiveResolver(resolver archive.ResolverFn) {
n.archiveResolver = resolver
}
// resolveExpiredNodeData resolves an expired node from the archive, verifies
// the reconstructed subtree hash, and stamps the cached hash onto the root.
// Returns an error if the archive data is corrupted (hash mismatch).
func resolveExpiredNodeData(n *expiredNode) (node, error) {
start := time.Now()
records, err := archive.ArchivedNodeResolver(n.offset, n.size)
if err != nil {
return nil, fmt.Errorf("failed to resolve expired node: %w", err)
}
resolved, err := archiveRecordsToNode(records)
if err != nil {
return nil, fmt.Errorf("failed to rebuild expired node from archive: %w", err)
}
depth := subtreeDepth(resolved)
log.Debug("Resurrected expired node from archive",
"offset", n.offset, "archiveBytes", n.size,
"records", len(records), "depth", depth,
"elapsed", time.Since(start))
// Verify hash integrity: if the original hash is known, check that the
// reconstructed subtree produces the same hash. A mismatch means the
// archive is corrupted (e.g. missing leaves due to unresolvable hashNodes
// during archival) and any data from it is unreliable.
if n.cachedHash != nil {
h := newHasher(false)
gotHash := h.hash(resolved, true)
returnHasherToPool(h)
if !bytes.Equal(gotHash, n.cachedHash) {
return nil, fmt.Errorf("expired node hash mismatch at offset=%d size=%d: archive data is corrupted (expected %x got %x, %d records)",
n.offset, n.size, []byte(n.cachedHash), gotHash, len(records))
}
// Stamp the original hash onto the resolved subtree root so the
// hasher returns it directly instead of re-computing.
switch nn := resolved.(type) {
case *fullNode:
nn.flags.hash = n.cachedHash
case *shortNode:
nn.flags.hash = n.cachedHash
}
}
// Mark the entire resolved subtree as dirty. This is critical for
// correctness with pathdb's diff layer model: when a trie with expired
// nodes is modified and committed, the committer only captures dirty
// nodes into the NodeSet (which becomes the diff layer). Without this
// marking, resolved-but-unmodified sibling nodes within the subtree
// would exist nowhere — not in any diff layer (they're clean) and not
// in the raw DB (the archiver deleted them). Subsequent trie accesses
// from higher diff layers would fall through to the disk layer, find
// nothing, and produce MissingNodeError.
//
// For read-only tries (only get operations, no commit), this dirty
// marking is harmless — the nodes are discarded when the trie is GC'd.
markSubtreeDirty(resolved)
return resolved, nil
}
// subtreeDepth returns the maximum depth of a trie subtree.
func subtreeDepth(n node) int {
switch n := n.(type) {
case *fullNode:
max := 0
for _, child := range &n.Children {
if child != nil {
if d := subtreeDepth(child); d > max {
max = d
}
}
}
return 1 + max
case *shortNode:
return 1 + subtreeDepth(n.Val)
default:
return 0
}
}
// markSubtreeDirty recursively marks all fullNode and shortNode in the
// subtree as dirty, preserving any cached hashes. This ensures the
// committer will capture them in the NodeSet during trie commit.
func markSubtreeDirty(n node) {
switch n := n.(type) {
case *fullNode:
n.flags.dirty = true
for _, child := range n.Children[:16] {
if child != nil {
markSubtreeDirty(child)
}
}
case *shortNode:
n.flags.dirty = true
markSubtreeDirty(n.Val)
}
// valueNode, hashNode, nil: no flags to mark
}
func archiveRecordsToNode(records []*archive.Record) (node, error) {
if len(records) == 0 {
return nil, archive.EmptyArchiveRecord
}
// Build the trie incrementally from nil to produce the canonical
// MPT structure. Starting with a fullNode would be wrong when the
// original subtree root was a shortNode (shared prefix).
var root node
for i, record := range records {
if err := validateRecordPath(record.Path); err != nil {
return nil, err
}
key, err := normalizeRecordKey(record.Path)
if err != nil {
return nil, err
}
if len(key) < 1 {
return nil, fmt.Errorf("empty key in record #%d", i)
}
root, err = insertTrieNode(root, key, valueNode(record.Value))
if err != nil {
return nil, err
}
}
return root, nil
}
func validateRecordPath(path []byte) error {
for i, b := range path {
if b > 16 {
return fmt.Errorf("invalid nibble in record path: %d", b)
}
if b == 16 && i != len(path)-1 {
return fmt.Errorf("terminator nibble in middle of record path")
}
}
return nil
}
// normalizeRecordKey ensures the record path is a hex-nibble key suitable for
// leaf insertion by guaranteeing a single terminator nibble and preserving any
// already-terminated path. Empty paths are normalized to a sole terminator.
func normalizeRecordKey(path []byte) ([]byte, error) {
if len(path) == 0 {
return []byte{16}, nil
}
if hasTerm(path) {
return path, nil
}
key := append([]byte{}, path...)
key = append(key, 16)
return key, nil
}
func insertTrieNode(n node, key []byte, value node) (node, error) {
if len(key) == 0 {
return value, nil
}
switch n := n.(type) {
case *shortNode:
matchlen := prefixLen(key, n.Key)
if matchlen == len(n.Key) {
nn, err := insertTrieNode(n.Val, key[matchlen:], value)
if err != nil {
return nil, err
}
return &shortNode{Key: n.Key, Val: nn}, nil
}
branch := &fullNode{}
var err error
branch.Children[n.Key[matchlen]], err = insertTrieNode(nil, n.Key[matchlen+1:], n.Val)
if err != nil {
return nil, err
}
branch.Children[key[matchlen]], err = insertTrieNode(nil, key[matchlen+1:], value)
if err != nil {
return nil, err
}
if matchlen == 0 {
return branch, nil
}
return &shortNode{Key: key[:matchlen], Val: branch}, nil
case *fullNode:
child, err := insertTrieNode(n.Children[key[0]], key[1:], value)
if err != nil {
return nil, err
}
n.Children[key[0]] = child
return n, nil
case nil:
return &shortNode{Key: key, Val: value}, nil
default:
return nil, fmt.Errorf("invalid node type in trie insert: %T", n)
}
}