This commit is contained in:
Guillaume Ballet 2026-06-18 19:54:12 +00:00 committed by GitHub
commit 8b6f607c4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 2473 additions and 13 deletions

576
cmd/geth/archivecmd.go Normal file
View file

@ -0,0 +1,576 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of go-ethereum.
//
// go-ethereum is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// go-ethereum is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
package main
import (
"encoding/binary"
"errors"
"fmt"
"os"
"path/filepath"
"slices"
"time"
"github.com/ethereum/go-ethereum/cmd/utils"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
"github.com/ethereum/go-ethereum/trie/archive"
"github.com/ethereum/go-ethereum/triedb/database"
"github.com/urfave/cli/v2"
)
var (
// Flags for the archive command
archiveOutputFlag = &cli.StringFlag{
Name: "output",
Usage: "Path to archive output file",
Value: "", // Default: <datadir>/nodearchive
}
archiveCompactionIntervalFlag = &cli.Uint64Flag{
Name: "compaction-interval",
Usage: "Run compaction after this many subtrees (0 = disable)",
Value: 1000,
}
archiveDryRunFlag = &cli.BoolFlag{
Name: "dry-run",
Usage: "Simulate without modifying database",
}
// Commands
archiveCheckNodeFlag = &cli.StringFlag{
Name: "owner",
Usage: "Owner hash (hex) for the trie node to check",
}
archiveCheckPathFlag = &cli.StringFlag{
Name: "path",
Usage: "Path (hex nibbles) of the trie node to check",
}
archiveCommand = &cli.Command{
Name: "archive",
Usage: "Archive state trie nodes to reduce database size",
Subcommands: []*cli.Command{
archiveGenerateCmd,
archiveVerifyCmd,
archiveDeleteJournalCmd,
archiveCheckNodeCmd,
},
}
archiveCheckNodeCmd = &cli.Command{
Name: "check-node",
Usage: "Check if a specific trie node exists in the raw DB",
Action: archiveCheckNode,
Flags: slices.Concat([]cli.Flag{
archiveCheckNodeFlag,
archiveCheckPathFlag,
}, utils.NetworkFlags, utils.DatabaseFlags),
}
archiveDeleteJournalCmd = &cli.Command{
Name: "delete-journal",
Usage: "Delete the pathdb journal to force a clean restart",
Action: archiveDeleteJournal,
Flags: slices.Concat(utils.NetworkFlags, utils.DatabaseFlags),
Description: `
Deletes the pathdb journal (TrieJournal key and merkle.journal file) from the
database. This forces geth to restart with a bare disk layer, discarding any
in-memory diff layers that may be inconsistent with archived state.
Use this after running 'archive generate' if geth was started in between and
recreated the journal.
Examples:
geth archive delete-journal --datadir /path/to/datadir
geth archive delete-journal --hoodi
`,
}
archiveVerifyCmd = &cli.Command{
Name: "verify",
Usage: "Verify all archived nodes can be correctly resurrected",
Action: archiveVerify,
Flags: slices.Concat(utils.NetworkFlags, utils.DatabaseFlags),
Description: `
Walks the entire state trie, resolving every expired node from the archive
file and verifying that the reconstructed subtree hash matches the original.
Also walks all storage tries referenced by accounts.
The database is opened read-only. No modifications are made.
Examples:
geth archive verify --datadir /path/to/datadir
geth archive verify --hoodi
`,
}
archiveGenerateCmd = &cli.Command{
Name: "generate",
Usage: "Generate archive files from height-3 subtrees",
ArgsUsage: "[state-root]",
Action: archiveGenerate,
Flags: slices.Concat([]cli.Flag{
archiveOutputFlag,
archiveCompactionIntervalFlag,
archiveDryRunFlag,
}, utils.NetworkFlags, utils.DatabaseFlags),
Description: `
Walks the state trie of the specified root (or head block) and archives
subtrees at height 3. Each archived subtree is replaced with an expiredNode
that references the archive file offset and size.
Height is measured from leaves: leaves=0, parents=1, etc. A height-3 node
has leaves at most 3 levels below it.
The archiver reads trie nodes directly from the persistent database layer,
bypassing any in-memory diff layers. This ensures consistency between the
data it reads and the data it modifies.
Examples:
# Archive from the persistent disk state
geth archive generate --datadir /path/to/datadir
# Dry run to see what would be archived
geth archive generate --dry-run --datadir /path/to/datadir
# Custom output and compaction interval
geth archive generate --output /path/to/archive --compaction-interval 500
`,
}
)
// rawDBNodeReader implements database.NodeReader by reading trie nodes directly
// from the raw key-value database, bypassing pathdb's in-memory diff layers.
// This ensures the archiver sees the same trie state it modifies.
type rawDBNodeReader struct {
db ethdb.KeyValueReader
}
func (r *rawDBNodeReader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) {
var blob []byte
if owner == (common.Hash{}) {
blob = rawdb.ReadAccountTrieNode(r.db, path)
} else {
blob = rawdb.ReadStorageTrieNode(r.db, owner, path)
}
// Skip hash verification: the raw DB may contain expiredNode markers
// (blob[0] == 0x00) which have different hashes than the original nodes.
return blob, nil
}
// rawDBNodeDatabase implements database.NodeDatabase using direct raw DB reads.
type rawDBNodeDatabase struct {
db ethdb.KeyValueReader
root common.Hash
}
func (d *rawDBNodeDatabase) NodeReader(stateRoot common.Hash) (database.NodeReader, error) {
// Only allow reading the persistent disk root state
if stateRoot != d.root {
return nil, fmt.Errorf("raw DB reader only supports disk root %x, got %x", d.root, stateRoot)
}
return &rawDBNodeReader{db: d.db}, nil
}
func archiveGenerate(ctx *cli.Context) error {
// 1. Setup node and databases
stack, _ := makeConfigNode(ctx)
defer stack.Close()
dryRun := ctx.Bool(archiveDryRunFlag.Name)
chaindb := utils.MakeChainDatabase(ctx, stack, dryRun)
defer chaindb.Close()
// Check state scheme - we only support PathDB
scheme := cycleCheckScheme(ctx, chaindb)
if scheme != rawdb.PathScheme {
return fmt.Errorf("archive generation requires path-based state scheme, got: %s", scheme)
}
// 2. Flush diff layers to disk via pathdb. This ensures the raw DB
// contains the complete, up-to-date state trie and that state history
// entries are properly written to the freezer.
trieDB := utils.MakeTrieDatabase(ctx, stack, chaindb, false, dryRun, false)
head, hasDiff := trieDB.DiffHead()
if hasDiff {
log.Info("Flushing diff layers to disk", "head", head)
if err := trieDB.Commit(head, true); err != nil {
trieDB.Close()
return fmt.Errorf("failed to flush diff layers: %w", err)
}
log.Info("Diff layers flushed successfully")
} else {
log.Info("No diff layers to flush, disk state is current", "root", head)
}
// Close triedb — we work directly with raw DB for archival.
// We'll re-open it at the end to write a fresh journal.
trieDB.Close()
// 3. Determine the disk state root (now up-to-date after flush).
rootBlob := rawdb.ReadAccountTrieNode(chaindb, nil)
if len(rootBlob) == 0 {
return errors.New("state trie not found in database")
}
root := crypto.Keccak256Hash(rootBlob)
log.Info("Using disk state root", "root", root)
// Create a raw DB node reader that bypasses pathdb layers
nodeDB := &rawDBNodeDatabase{db: chaindb, root: root}
// 4. Open archive writer (unless dry-run).
// The archive file is placed at <datadir>/geth/nodearchive by default,
// matching the path used by ArchivedNodeResolver when reading back.
var writer *archive.ArchiveWriter
archivePath := ctx.String(archiveOutputFlag.Name)
if archivePath == "" {
archivePath = filepath.Join(stack.ResolvePath(""), "nodearchive")
}
if !dryRun {
var err error
writer, err = archive.NewArchiveWriter(archivePath)
if err != nil {
return fmt.Errorf("failed to open archive file %s: %w", archivePath, err)
}
defer writer.Close()
log.Info("Opened archive file", "path", archivePath)
} else {
log.Info("Dry run mode - no changes will be made")
}
// 5. Create and run archiver
archiver := trie.NewArchiver(
chaindb,
nodeDB,
writer,
ctx.Uint64(archiveCompactionIntervalFlag.Name),
dryRun,
)
start := time.Now()
if err := archiver.ProcessState(root); err != nil {
return fmt.Errorf("archive generation failed: %w", err)
}
// 6. Get stats and optionally run final compaction
subtrees, leaves, bytesDeleted := archiver.Stats()
if !dryRun && subtrees > 0 {
log.Info("Running final database compaction")
if err := chaindb.Compact(nil, nil); err != nil {
log.Warn("Final compaction failed", "err", err)
}
}
// 7. Re-journal the pathdb state with the current disk root.
// After archiving, some trie nodes have been replaced with expired
// markers. We re-open pathdb and write a fresh journal (disk layer
// only, since all diff layers were flushed in step 2) so that geth
// can restart cleanly.
if !dryRun {
log.Info("Re-journaling pathdb state")
freshTrieDB := utils.MakeTrieDatabase(ctx, stack, chaindb, false, false, false)
freshRoot := crypto.Keccak256Hash(rawdb.ReadAccountTrieNode(chaindb, nil))
if err := freshTrieDB.Journal(freshRoot); err != nil {
log.Warn("Failed to re-journal pathdb state", "err", err)
}
freshTrieDB.Close()
}
// 8. Print summary
var archiveSize uint64
if writer != nil {
archiveSize = writer.Offset()
}
log.Info("Archive generation complete",
"subtrees", subtrees,
"leaves", leaves,
"bytesDeleted", bytesDeleted,
"archiveSize", archiveSize,
"elapsed", common.PrettyDuration(time.Since(start)))
if dryRun {
log.Info("This was a dry run - no changes were made to the database")
}
return nil
}
func archiveVerify(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
// Open database read-only
chaindb := utils.MakeChainDatabase(ctx, stack, true)
defer chaindb.Close()
scheme := cycleCheckScheme(ctx, chaindb)
if scheme != rawdb.PathScheme {
return fmt.Errorf("archive verify requires path-based state scheme, got: %s", scheme)
}
// Set archive data dir so ArchivedNodeResolver can find the file
// ResolvePath("") returns the node's data directory (e.g. .ethereum/hoodi/geth),
// but ArchivedNodeResolver expects the instance directory (.ethereum/hoodi)
// since it appends "geth/nodearchive" itself.
archive.ArchiveDataDir = filepath.Dir(stack.ResolvePath(""))
// Compute disk root
rootBlob := rawdb.ReadAccountTrieNode(chaindb, nil)
if len(rootBlob) == 0 {
return errors.New("state trie not found in database")
}
root := crypto.Keccak256Hash(rootBlob)
log.Info("Verifying archived nodes", "root", root)
nodeDB := &rawDBNodeDatabase{db: chaindb, root: root}
// Open account trie
accountTrie, err := trie.New(trie.StateTrieID(root), nodeDB)
if err != nil {
return fmt.Errorf("failed to open account trie: %w", err)
}
var (
totalAccounts int
totalStorageTries int
totalLeaves int
totalExpired int
totalErrors int
start = time.Now()
lastLog = time.Now()
)
// Walk the account trie — this resolves all expired nodes and verifies hashes
accountStats, err := accountTrie.Walk(func(path []byte, value []byte) error {
totalAccounts++
if time.Since(lastLog) > 30*time.Second {
log.Info("Verification progress",
"accounts", totalAccounts,
"storageTries", totalStorageTries,
"leaves", totalLeaves,
"expired", totalExpired,
"errors", totalErrors)
lastLog = time.Now()
}
// Decode account to check for storage trie
var acc types.StateAccount
if err := rlp.DecodeBytes(value, &acc); err != nil {
log.Warn("Failed to decode account", "err", err)
totalErrors++
return nil // continue walking
}
if acc.Root == types.EmptyRootHash {
return nil
}
// Open and walk storage trie.
// path is hex-nibble encoded (with a 16 terminator from the trie key),
// so convert nibble pairs back to the 32-byte account hash.
nibbles := path
if len(nibbles) > 0 && nibbles[len(nibbles)-1] == 16 {
nibbles = nibbles[:len(nibbles)-1]
}
keyBytes := make([]byte, len(nibbles)/2)
for i := 0; i < len(nibbles); i += 2 {
keyBytes[i/2] = nibbles[i]<<4 | nibbles[i+1]
}
accountHash := common.BytesToHash(keyBytes)
storageID := trie.StorageTrieID(root, accountHash, acc.Root)
storageTrie, err := trie.New(storageID, nodeDB)
if err != nil {
log.Warn("Failed to open storage trie", "account", accountHash, "err", err)
totalErrors++
return nil
}
storageStats, err := storageTrie.Walk(func(spath []byte, svalue []byte) error {
return nil
})
if err != nil {
log.Warn("Storage trie walk failed", "account", accountHash, "err", err)
totalErrors++
return nil
}
totalStorageTries++
totalLeaves += storageStats.Leaves
totalExpired += storageStats.ExpiredResolved
return nil
})
if err != nil {
return fmt.Errorf("account trie walk failed: %w", err)
}
totalLeaves += accountStats.Leaves
totalExpired += accountStats.ExpiredResolved
log.Info("Archive verification complete",
"accounts", totalAccounts,
"storageTries", totalStorageTries,
"totalLeaves", totalLeaves,
"expiredResolved", totalExpired,
"errors", totalErrors,
"elapsed", common.PrettyDuration(time.Since(start)))
if totalErrors > 0 {
return fmt.Errorf("verification completed with %d errors", totalErrors)
}
return nil
}
func archiveDeleteJournal(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, false)
defer chaindb.Close()
// Delete the pathdb journal KV key
if err := chaindb.Delete([]byte("TrieJournal")); err != nil {
log.Warn("Failed to delete pathdb journal key", "err", err)
} else {
log.Info("Deleted pathdb journal key (TrieJournal)")
}
// Delete the journal file(s) - check both legacy and current locations
for _, dir := range []string{"triedb", ""} {
for _, name := range []string{"merkle.journal", "verkle.journal"} {
journalFile := filepath.Join(stack.ResolvePath(dir), name)
if err := os.Remove(journalFile); err == nil {
log.Info("Deleted journal file", "path", journalFile)
} else if !os.IsNotExist(err) {
log.Warn("Failed to delete journal file", "path", journalFile, "err", err)
}
}
}
return nil
}
func archiveCheckNode(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, true)
defer chaindb.Close()
ownerHex := ctx.String(archiveCheckNodeFlag.Name)
pathHex := ctx.String(archiveCheckPathFlag.Name)
if ownerHex == "" {
return errors.New("--owner flag is required")
}
owner := common.HexToHash(ownerHex)
// Parse path: hex nibbles like "08" → []byte{0, 8}
var path []byte
for _, c := range pathHex {
var nibble byte
switch {
case c >= '0' && c <= '9':
nibble = byte(c - '0')
case c >= 'a' && c <= 'f':
nibble = byte(c-'a') + 10
case c >= 'A' && c <= 'F':
nibble = byte(c-'A') + 10
default:
return fmt.Errorf("invalid hex char in path: %c", c)
}
path = append(path, nibble)
}
log.Info("Checking node in raw DB", "owner", owner, "path", fmt.Sprintf("%x", path))
// Read the node directly from the raw DB
isAccount := owner == (common.Hash{})
// Check the target path and all prefixes up to root
for i := len(path); i >= 0; i-- {
subpath := path[:i]
var blob []byte
if isAccount {
blob = rawdb.ReadAccountTrieNode(chaindb, subpath)
} else {
blob = rawdb.ReadStorageTrieNode(chaindb, owner, subpath)
}
status := "MISSING"
details := ""
if len(blob) > 0 {
if blob[0] == 0x00 {
status = "EXPIRED"
if len(blob) == 17 {
offset := binary.BigEndian.Uint64(blob[1:9])
size := binary.BigEndian.Uint64(blob[9:17])
details = fmt.Sprintf("offset=%d size=%d", offset, size)
}
} else {
status = fmt.Sprintf("PRESENT (%d bytes, first=0x%02x)", len(blob), blob[0])
}
}
label := "prefix"
if i == len(path) {
label = "TARGET"
}
if i == 0 {
label = "ROOT"
}
log.Info("Node check",
"label", label,
"path", fmt.Sprintf("%x", subpath),
"pathLen", i,
"status", status,
"details", details)
}
// Also check a few child paths to see what's below the target
for nibble := byte(0); nibble < 16; nibble++ {
childPath := append(append([]byte{}, path...), nibble)
var blob []byte
if isAccount {
blob = rawdb.ReadAccountTrieNode(chaindb, childPath)
} else {
blob = rawdb.ReadStorageTrieNode(chaindb, owner, childPath)
}
if len(blob) > 0 {
status := fmt.Sprintf("PRESENT (%d bytes, first=0x%02x)", len(blob), blob[0])
if blob[0] == 0x00 && len(blob) == 17 {
offset := binary.BigEndian.Uint64(blob[1:9])
size := binary.BigEndian.Uint64(blob[9:17])
status = fmt.Sprintf("EXPIRED offset=%d size=%d", offset, size)
}
log.Info("Child node", "path", fmt.Sprintf("%x", childPath), "status", status)
}
}
return nil
}
// cycleCheckScheme returns the state scheme for the database.
// It's a helper to check what scheme is in use.
func cycleCheckScheme(ctx *cli.Context, db ethdb.Database) string {
return rawdb.ReadStateScheme(db)
}

View file

@ -240,6 +240,8 @@ func init() {
dumpConfigCommand,
// see dbcmd.go
dbCommand,
// See archivecmd.go
archiveCommand,
// See cmd/utils/flags_legacy.go
utils.ShowDeprecated,
// See snapshot.go

View file

@ -38,6 +38,7 @@ import (
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/p2p"
"github.com/ethereum/go-ethereum/rpc"
"github.com/ethereum/go-ethereum/trie/archive"
"github.com/gofrs/flock"
)
@ -85,6 +86,7 @@ func New(conf *Config) (*Node, error) {
return nil, err
}
conf.DataDir = absdatadir
archive.ArchiveDataDir = absdatadir
}
if conf.Logger == nil {
conf.Logger = log.New()

95
trie/archive/archive.go Normal file
View file

@ -0,0 +1,95 @@
// Copyright 2026 go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package archive
import (
"bytes"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"github.com/ethereum/go-ethereum/rlp"
)
// ResolverFn is a callback to resolve expired nodes from an archive file.
// Given an offset and size, it returns the serialized node data from the archive.
type ResolverFn func(offset, size uint64) ([]*Record, error)
// OffsetSize is the size of the file offset in bytes.
const OffsetSize = 8
var (
EmptyArchiveRecord = errors.New("empty record") // The archive contained a size-zero record.
ErrNoResolver = errors.New("no archive resolver set for expired node") // An expired node is accessed without a resolver.
)
// Record contains an archive file record. It is not the most optimal
// structure, since any modification to it will need to be overwritten.
type Record struct {
Path []byte
Value []byte
}
// ArchiveDataDir is the data directory where the archive file is stored.
var ArchiveDataDir string
// ArchivedNodeResolver takes a buffer containing the archive data
// held by an expiring node (an offset and a size) and returns a
// list of records, which is a list of serialized leaf nodes. The
// caller knows the context (MPT, binary trie) and is responsible
// for decoding the nodes.
func ArchivedNodeResolver(offset, size uint64) ([]*Record, error) {
file, err := os.Open(filepath.Join(ArchiveDataDir, "geth", "nodearchive"))
if err != nil {
return nil, fmt.Errorf("error opening archive file: %w", err)
}
defer file.Close()
o, err := file.Seek(int64(offset), io.SeekStart)
if err != nil {
return nil, fmt.Errorf("error seeking into archive file: %w", err)
}
if uint64(o) != offset {
return nil, fmt.Errorf("invalid offset: want %d, got %d", offset, o)
}
data := make([]byte, size)
if _, err := io.ReadFull(file, data); err != nil {
return nil, fmt.Errorf("error reading data from archive: %w", err)
}
var records []*Record
stream := rlp.NewStream(bytes.NewReader(data), uint64(len(data)))
for len(data) > 0 {
_, size, err := stream.Kind()
if err == io.EOF {
break
}
if err != nil {
return nil, fmt.Errorf("error getting rlp kind from archive data: %w", err)
}
var record Record
err = stream.Decode(&record)
if err != nil {
return nil, fmt.Errorf("error decoding rlp record from archive data (offset=%d, size=%d): %w", offset, size, err)
}
records = append(records, &record)
}
return records, nil
}

92
trie/archive/writer.go Normal file
View file

@ -0,0 +1,92 @@
// Copyright 2026 go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package archive
import (
"os"
"sync"
"github.com/ethereum/go-ethereum/rlp"
)
// ArchiveWriter is an append-only writer for archive files.
// It writes RLP-encoded records to a file and tracks the current offset.
type ArchiveWriter struct {
file *os.File
offset uint64
mu sync.Mutex
}
// NewArchiveWriter creates a new archive writer that appends to the given file.
// If the file exists, it will be opened in append mode and writing continues
// from the current end of file. If it doesn't exist, it will be created.
func NewArchiveWriter(path string) (*ArchiveWriter, error) {
file, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return nil, err
}
info, err := file.Stat()
if err != nil {
file.Close()
return nil, err
}
return &ArchiveWriter{
file: file,
offset: uint64(info.Size()),
}, nil
}
// WriteSubtree writes all records belonging to a subtree and returns
// the starting offset and total size of the written data.
// This is the atomic unit of archival - all records for a subtree are
// written together and can be retrieved together using the returned
// offset and size.
func (w *ArchiveWriter) WriteSubtree(records []*Record) (offset uint64, size uint64, err error) {
w.mu.Lock()
defer w.mu.Unlock()
startOffset := w.offset
for _, rec := range records {
encoded, err := rlp.EncodeToBytes(rec)
if err != nil {
return 0, 0, err
}
if _, err := w.file.Write(encoded); err != nil {
return 0, 0, err
}
w.offset += uint64(len(encoded))
}
return startOffset, w.offset - startOffset, nil
}
// Sync flushes the file to disk. This should be called after writing
// a subtree and before modifying the database to ensure crash consistency.
func (w *ArchiveWriter) Sync() error {
return w.file.Sync()
}
// Close closes the archive file.
func (w *ArchiveWriter) Close() error {
return w.file.Close()
}
// Offset returns the current write offset in the file.
func (w *ArchiveWriter) Offset() uint64 {
w.mu.Lock()
defer w.mu.Unlock()
return w.offset
}

599
trie/archiver.go Normal file
View file

@ -0,0 +1,599 @@
// Copyright 2026 go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
"encoding/binary"
"fmt"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie/archive"
"github.com/ethereum/go-ethereum/triedb/database"
)
// subtreeInfo holds information about a subtree to be archived.
// It contains all the data needed to write the subtree to an archive
// and replace it with an expiredNode in the database.
type subtreeInfo struct {
path []byte // Hex-encoded path to subtree root
owner common.Hash // Zero for account trie, account hash for storage
height int // Height of subtree (from leaves)
leaves []*archive.Record // All leaf records (relative path + encoded node)
nodePaths [][]byte // Paths of all nodes to delete
rootHash common.Hash // Hash of the original subtree root (for verification)
}
// Archiver handles the archival process of trie nodes.
// It walks the state trie, identifies subtrees at height 3,
// archives their leaf data, and replaces them with expiredNode markers.
//
// The archiver uses a streaming approach: it walks the trie using a
// NodeIterator, probes each node's height via bounded raw DB reads,
// and archives subtrees immediately when found. This keeps memory
// usage proportional to the iterator stack depth + the current subtree
// being processed, rather than loading the entire trie into memory.
type Archiver struct {
db ethdb.Database
triedb database.NodeDatabase
writer *archive.ArchiveWriter
compactionInterval uint64
dryRun bool
stateRoot common.Hash
// Progress tracking
subtreesArchived uint64
bytesDeleted uint64
leavesArchived uint64
lastCompaction uint64
}
// NewArchiver creates a new archiver instance.
//
// Parameters:
// - db: The underlying key-value database
// - triedb: The trie database for reading nodes
// - writer: Archive file writer (can be nil for dry run)
// - compactionInterval: Run compaction after this many subtrees (0 = disable)
// - dryRun: If true, don't modify the database
func NewArchiver(db ethdb.Database, triedb database.NodeDatabase,
writer *archive.ArchiveWriter, compactionInterval uint64, dryRun bool) *Archiver {
return &Archiver{
db: db,
triedb: triedb,
writer: writer,
compactionInterval: compactionInterval,
dryRun: dryRun,
}
}
// ProcessState archives subtrees from the given state root.
// It processes storage tries first, then the account trie.
func (a *Archiver) ProcessState(root common.Hash) error {
a.stateRoot = root
accountTrie, err := New(StateTrieID(root), a.triedb)
if err != nil {
return fmt.Errorf("failed to open account trie: %w", err)
}
log.Info("Processing storage tries")
iter, err := accountTrie.NodeIterator(nil)
if err != nil {
return fmt.Errorf("failed to create account iterator: %w", err)
}
kvIter := NewIterator(iter)
for kvIter.Next() {
// Decode the account to check for storage
var acc types.StateAccount
if err := rlp.DecodeBytes(kvIter.Value, &acc); err != nil {
log.Warn("Failed to decode account", "err", err)
continue
}
if acc.Root == types.EmptyRootHash {
continue
}
// Process this account's storage trie
accountHash := common.BytesToHash(kvIter.Key)
storageID := StorageTrieID(root, accountHash, acc.Root)
storageTrie, err := New(storageID, a.triedb)
if err != nil {
log.Warn("Failed to open storage trie", "account", accountHash, "err", err)
continue
}
if err := a.processTrie(accountHash, storageTrie); err != nil {
log.Warn("Failed to process storage trie", "account", accountHash, "err", err)
}
}
if kvIter.Err != nil {
return fmt.Errorf("account iteration error: %w", kvIter.Err)
}
log.Info("Processing account trie", "root", root)
if err := a.processTrie(common.Hash{}, accountTrie); err != nil {
return fmt.Errorf("failed to process account trie: %w", err)
}
return nil
}
// processTrie finds and archives all height-3 subtrees in the trie using
// a streaming approach. It walks the trie with a NodeIterator, probes each
// node's height via bounded raw DB reads, and archives subtrees immediately.
//
// Memory usage is O(iterator_stack_depth + current_subtree_size) instead of
// O(entire_trie) as with the previous recursive approach.
func (a *Archiver) processTrie(owner common.Hash, t *Trie) error {
if t.root == nil {
return nil
}
iter, err := t.NodeIterator(nil)
if err != nil {
return fmt.Errorf("failed to create node iterator: %w", err)
}
var (
lastLog = time.Now()
found uint64
)
for iter.Next(true) {
if iter.Leaf() {
continue
}
// Progress logging
if time.Since(lastLog) > 30*time.Second {
log.Info("Scanning trie for subtrees",
"owner", owner,
"path", common.Bytes2Hex(iter.Path()),
"found", found,
"archived", a.subtreesArchived)
lastLog = time.Now()
}
path := copyBytes(iter.Path())
hash := iter.Hash()
if hash == (common.Hash{}) {
// Embedded node (no hash), skip — it will be part of a
// parent subtree.
continue
}
// Probe subtree height via bounded raw DB reads.
// This does NOT load the trie into memory — it reads blobs from
// the DB, decodes them, computes height, and discards them.
height := a.probeHeight(owner, path, hash, 3)
if height != 3 {
// Too small to archive; the iterator will visit children.
// Too tall — descend into children to find height-3 subtrees.
continue
}
// height == 3: collect and archive this subtree immediately.
info := a.collectSubtree(owner, path, hash)
if info == nil {
continue
}
found++
if err := a.archiveSubtree(info); err != nil {
log.Warn("Failed to archive subtree", "path", common.Bytes2Hex(path), "err", err)
continue
}
a.subtreesArchived++
a.leavesArchived += uint64(len(info.leaves))
if err := a.maybeCompact(); err != nil {
log.Warn("Compaction failed", "err", err)
}
// Skip children — they're now archived.
// We call Next(false) to move past the subtree without descending.
iter.Next(false)
}
if iter.Error() != nil {
return fmt.Errorf("iterator error: %w", iter.Error())
}
log.Info("Found subtrees to archive", "owner", owner, "count", found)
return nil
}
// probeHeight computes the height of a node by reading from the raw DB.
// It stops early once height exceeds maxHeight (returns maxHeight+1).
// The decoded nodes are not retained — they are discarded after inspection.
//
// Height is measured from leaves: leaves=0, their parents=1, etc.
func (a *Archiver) probeHeight(owner common.Hash, path []byte, hash common.Hash, maxHeight int) int {
blob := a.readNodeBlob(owner, path)
if len(blob) == 0 {
return 0
}
// Already expired — skip.
if blob[0] == expiredNodeMarker {
return -1
}
n, err := decodeNodeUnsafe(hash[:], blob)
if err != nil {
return 0
}
return a.nodeHeight(n, path, owner, maxHeight)
}
// nodeHeight computes the height of a decoded node, bounded by maxHeight.
// Returns maxHeight+1 early if the subtree is taller than maxHeight.
func (a *Archiver) nodeHeight(n node, path []byte, owner common.Hash, maxHeight int) int {
switch n := n.(type) {
case nil:
return 0
case valueNode:
return 0
case *shortNode:
childPath := append(append([]byte{}, path...), n.Key...)
switch child := n.Val.(type) {
case valueNode:
return 1 // shortNode → leaf
case hashNode:
if maxHeight <= 1 {
return maxHeight + 1
}
childHeight := a.probeHeight(owner, childPath, common.BytesToHash(child), maxHeight-1)
if childHeight < 0 {
return -1 // expired child
}
return childHeight + 1
default:
// Inline node
childHeight := a.nodeHeight(child, childPath, owner, maxHeight-1)
if childHeight < 0 {
return -1
}
return childHeight + 1
}
case *fullNode:
maxH := 0
for i, child := range n.Children[:16] {
if child == nil {
continue
}
childPath := append(append([]byte{}, path...), byte(i))
var childHeight int
switch c := child.(type) {
case valueNode:
childHeight = 0
case hashNode:
if maxH+1 > maxHeight {
return maxHeight + 1
}
childHeight = a.probeHeight(owner, childPath, common.BytesToHash(c), maxHeight-1)
default:
childHeight = a.nodeHeight(c, childPath, owner, maxHeight-1)
}
if childHeight < 0 {
continue // expired child, skip
}
h := childHeight + 1
if h > maxH {
maxH = h
}
if maxH > maxHeight {
return maxHeight + 1
}
}
return maxH
case hashNode:
return a.probeHeight(owner, path, common.BytesToHash(n), maxHeight)
case *expiredNode:
return -1
}
return 0
}
// collectSubtree reads a height-3 subtree from the raw DB and collects its
// leaves and node paths for archival. The subtree is bounded (height ≤ 3),
// so memory usage is limited.
func (a *Archiver) collectSubtree(owner common.Hash, path []byte, hash common.Hash) *subtreeInfo {
blob := a.readNodeBlob(owner, path)
if len(blob) == 0 {
return nil
}
if blob[0] == expiredNodeMarker {
return nil
}
n, err := decodeNodeUnsafe(hash[:], blob)
if err != nil {
log.Warn("Failed to decode node for collection", "path", common.Bytes2Hex(path), "err", err)
return nil
}
info := &subtreeInfo{
path: copyBytes(path),
owner: owner,
rootHash: hash,
}
leaves, nodePaths, height, err := a.collectNodeLeaves(n, path, nil, owner)
if err != nil {
log.Warn("Failed to collect subtree leaves", "path", common.Bytes2Hex(path), "err", err)
return nil
}
info.height = height
info.leaves = leaves
info.nodePaths = append([][]byte{copyBytes(path)}, nodePaths...)
return info
}
// collectNodeLeaves recursively collects all leaves and node paths in a
// bounded subtree. relPath is the path relative to the subtree root.
// Returns (leaves, nodePaths, height, error).
func (a *Archiver) collectNodeLeaves(n node, absPath, relPath []byte, owner common.Hash) ([]*archive.Record, [][]byte, int, error) {
switch n := n.(type) {
case nil:
return nil, nil, 0, nil
case valueNode:
return []*archive.Record{{
Path: copyBytes(relPath),
Value: []byte(n),
}}, nil, 0, nil
case *shortNode:
childAbsPath := append(append([]byte{}, absPath...), n.Key...)
var childNode node
switch c := n.Val.(type) {
case hashNode:
resolved, err := a.resolveRawNode(owner, childAbsPath, common.BytesToHash(c))
if err != nil {
return nil, nil, 0, fmt.Errorf("resolve shortNode child at %s: %w", common.Bytes2Hex(childAbsPath), err)
}
childNode = resolved
default:
childNode = c
}
// Pass nil relPath to child — we prepend the key ourselves
leaves, nodePaths, height, err := a.collectNodeLeaves(childNode, childAbsPath, nil, owner)
if err != nil {
return nil, nil, 0, err
}
// Prepend [relPath + extension key] to leaf relative paths
prefix := append(append([]byte{}, relPath...), n.Key...)
for _, leaf := range leaves {
leaf.Path = append(append([]byte{}, prefix...), leaf.Path...)
}
return leaves, append([][]byte{copyBytes(absPath)}, nodePaths...), height + 1, nil
case *fullNode:
var (
allLeaves []*archive.Record
allPaths [][]byte
maxHeight int
)
for i, child := range n.Children[:16] {
if child == nil {
continue
}
childAbsPath := append(append([]byte{}, absPath...), byte(i))
var childNode node
switch c := child.(type) {
case hashNode:
resolved, err := a.resolveRawNode(owner, childAbsPath, common.BytesToHash(c))
if err != nil {
return nil, nil, 0, fmt.Errorf("resolve fullNode child[%x] at %s: %w", i, common.Bytes2Hex(childAbsPath), err)
}
childNode = resolved
default:
childNode = c
}
// Pass nil relPath to child — we prepend the index ourselves
leaves, nodePaths, height, err := a.collectNodeLeaves(childNode, childAbsPath, nil, owner)
if err != nil {
return nil, nil, 0, err
}
// Prepend [relPath + branch index] to leaf relative paths
prefix := append(append([]byte{}, relPath...), byte(i))
for _, leaf := range leaves {
leaf.Path = append(append([]byte{}, prefix...), leaf.Path...)
}
allLeaves = append(allLeaves, leaves...)
allPaths = append(allPaths, nodePaths...)
h := height + 1
if h > maxHeight {
maxHeight = h
}
}
return allLeaves, allPaths, maxHeight, nil
case hashNode:
resolved, err := a.resolveRawNode(owner, absPath, common.BytesToHash(n))
if err != nil {
return nil, nil, 0, err
}
return a.collectNodeLeaves(resolved, absPath, relPath, owner)
case *expiredNode:
return nil, nil, 0, nil
}
return nil, nil, 0, nil
}
// readNodeBlob reads a trie node blob directly from the raw key-value
// database, bypassing pathdb layers.
func (a *Archiver) readNodeBlob(owner common.Hash, path []byte) []byte {
if owner == (common.Hash{}) {
return rawdb.ReadAccountTrieNode(a.db, path)
}
return rawdb.ReadStorageTrieNode(a.db, owner, path)
}
// resolveRawNode reads and decodes a trie node directly from the raw DB.
// Unlike resolveNode, this does NOT use the trie database (no caching,
// no diff layers). The decoded node is ephemeral and will be GC'd after use.
func (a *Archiver) resolveRawNode(owner common.Hash, path []byte, hash common.Hash) (node, error) {
blob := a.readNodeBlob(owner, path)
if len(blob) == 0 {
return nil, fmt.Errorf("node not found: owner=%s path=%s", owner, common.Bytes2Hex(path))
}
if blob[0] == expiredNodeMarker {
return &expiredNode{}, nil
}
return decodeNodeUnsafe(hash[:], blob)
}
// archiveSubtree writes leaves to archive and replaces subtree with expiredNode.
func (a *Archiver) archiveSubtree(info *subtreeInfo) error {
if a.dryRun {
log.Info("Would archive subtree",
"path", common.Bytes2Hex(info.path),
"owner", info.owner,
"height", info.height,
"leaves", len(info.leaves),
"nodes", len(info.nodePaths))
return nil
}
// 1. Write to archive file
offset, size, err := a.writer.WriteSubtree(info.leaves)
if err != nil {
return fmt.Errorf("failed to write subtree to archive: %w", err)
}
// 2. Sync to ensure durability before modifying DB
if err := a.writer.Sync(); err != nil {
return fmt.Errorf("failed to sync archive: %w", err)
}
// 3. Verify archive round-trip: reconstruct trie from records and
// check that the hash matches the original subtree root. This
// catches any data corruption before we delete the original nodes.
if info.rootHash != (common.Hash{}) {
reconstructed, err := archiveRecordsToNode(info.leaves)
if err != nil {
return fmt.Errorf("archive verification failed: cannot reconstruct trie from records: %w", err)
}
h := newHasher(false)
gotHash := common.BytesToHash(h.hash(reconstructed, true))
returnHasherToPool(h)
if gotHash != info.rootHash {
return fmt.Errorf("archive verification failed: hash mismatch at path %s owner %s: got %s want %s (leaves=%d offset=%d size=%d)",
common.Bytes2Hex(info.path), info.owner, gotHash, info.rootHash,
len(info.leaves), offset, size)
}
}
// 4. Batch database operations
batch := a.db.NewBatch()
// Delete all nodes in subtree (except the root which we'll overwrite)
for _, nodePath := range info.nodePaths[1:] { // Skip first (root)
if info.owner == (common.Hash{}) {
rawdb.DeleteAccountTrieNode(batch, nodePath)
} else {
rawdb.DeleteStorageTrieNode(batch, info.owner, nodePath)
}
a.bytesDeleted += uint64(len(nodePath))
}
// Write expiredNode at subtree root
expiredBlob := encodeExpiredNodeBlob(offset, size)
if info.owner == (common.Hash{}) {
rawdb.WriteAccountTrieNode(batch, info.path, expiredBlob)
} else {
rawdb.WriteStorageTrieNode(batch, info.owner, info.path, expiredBlob)
}
if err := batch.Write(); err != nil {
return fmt.Errorf("failed to write batch: %w", err)
}
log.Debug("Archived subtree",
"path", common.Bytes2Hex(info.path),
"owner", info.owner,
"leaves", len(info.leaves),
"offset", offset,
"size", size)
return nil
}
// maybeCompact runs database compaction if the threshold is reached.
func (a *Archiver) maybeCompact() error {
if a.compactionInterval == 0 {
return nil
}
if a.subtreesArchived-a.lastCompaction >= a.compactionInterval {
log.Info("Running database compaction", "subtrees", a.subtreesArchived)
if err := a.db.Compact(nil, nil); err != nil {
return err
}
a.lastCompaction = a.subtreesArchived
}
return nil
}
// encodeExpiredNodeBlob creates the raw bytes for an expiredNode.
// Format: 1-byte marker (0x00) + 8-byte offset + 8-byte size = 17 bytes
func encodeExpiredNodeBlob(offset, size uint64) []byte {
buf := make([]byte, 1+2*archive.OffsetSize) // 17 bytes
buf[0] = expiredNodeMarker // 0x00
binary.BigEndian.PutUint64(buf[1:], offset)
binary.BigEndian.PutUint64(buf[1+archive.OffsetSize:], size)
return buf
}
// Stats returns archival statistics.
func (a *Archiver) Stats() (subtrees, leaves, bytesDeleted uint64) {
return a.subtreesArchived, a.leavesArchived, a.bytesDeleted
}
// copyBytes returns a copy of the given byte slice.
func copyBytes(b []byte) []byte {
if b == nil {
return nil
}
c := make([]byte, len(b))
copy(c, b)
return c
}

View file

@ -79,6 +79,8 @@ func (c *committer) commit(path []byte, n node, parallel bool) node {
return cn
case hashNode:
return cn
case *expiredNode:
return cn
default:
// nil, valuenode shouldn't be committed
panic(fmt.Sprintf("%T: invalid node: %v", n, n))

262
trie/expired_node.go Normal file
View file

@ -0,0 +1,262 @@
// Copyright 2026 go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
"bytes"
"encoding/binary"
"fmt"
"time"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie/archive"
)
// expiredNodeMarker is a special marker byte to identify expired nodes.
// Using 0x00 as a marker since valid MPT nodes are always RLP lists (starting with 0xc0+).
const expiredNodeMarker = 0x00
// expiredNode represents a node whose data has been archived.
// It stores the file offset and size of the archived data.
type expiredNode struct {
offset uint64
size uint64
cachedHash hashNode
archiveResolver archive.ResolverFn
}
func (n *expiredNode) cache() (hashNode, bool) {
return n.cachedHash, n.cachedHash == nil
}
func (n *expiredNode) encode(w rlp.EncoderBuffer) {
var buf [1 + 2*archive.OffsetSize]byte
buf[0] = expiredNodeMarker
binary.BigEndian.PutUint64(buf[1:], n.offset)
binary.BigEndian.PutUint64(buf[1+archive.OffsetSize:], n.size)
w.Write(buf[:])
}
func (n *expiredNode) fstring(ind string) string {
return fmt.Sprintf("<expired: offset=%d, size=%d> ", n.offset, n.size)
}
// Offset returns the archive file offset for this expired node.
func (n *expiredNode) Offset() uint64 {
return n.offset
}
// SetArchiveResolver sets the resolver function for this expired node.
func (n *expiredNode) SetArchiveResolver(resolver archive.ResolverFn) {
n.archiveResolver = resolver
}
// resolveExpiredNodeData resolves an expired node from the archive, verifies
// the reconstructed subtree hash, and stamps the cached hash onto the root.
// Returns an error if the archive data is corrupted (hash mismatch).
func resolveExpiredNodeData(n *expiredNode) (node, error) {
start := time.Now()
records, err := archive.ArchivedNodeResolver(n.offset, n.size)
if err != nil {
return nil, fmt.Errorf("failed to resolve expired node: %w", err)
}
resolved, err := archiveRecordsToNode(records)
if err != nil {
return nil, fmt.Errorf("failed to rebuild expired node from archive: %w", err)
}
depth := subtreeDepth(resolved)
log.Debug("Resurrected expired node from archive",
"offset", n.offset, "archiveBytes", n.size,
"records", len(records), "depth", depth,
"elapsed", time.Since(start))
// Verify hash integrity: if the original hash is known, check that the
// reconstructed subtree produces the same hash. A mismatch means the
// archive is corrupted (e.g. missing leaves due to unresolvable hashNodes
// during archival) and any data from it is unreliable.
if n.cachedHash != nil {
h := newHasher(false)
gotHash := h.hash(resolved, true)
returnHasherToPool(h)
if !bytes.Equal(gotHash, n.cachedHash) {
return nil, fmt.Errorf("expired node hash mismatch at offset=%d size=%d: archive data is corrupted (expected %x got %x, %d records)",
n.offset, n.size, []byte(n.cachedHash), gotHash, len(records))
}
// Stamp the original hash onto the resolved subtree root so the
// hasher returns it directly instead of re-computing.
switch nn := resolved.(type) {
case *fullNode:
nn.flags.hash = n.cachedHash
case *shortNode:
nn.flags.hash = n.cachedHash
}
}
// Mark the entire resolved subtree as dirty. This is critical for
// correctness with pathdb's diff layer model: when a trie with expired
// nodes is modified and committed, the committer only captures dirty
// nodes into the NodeSet (which becomes the diff layer). Without this
// marking, resolved-but-unmodified sibling nodes within the subtree
// would exist nowhere — not in any diff layer (they're clean) and not
// in the raw DB (the archiver deleted them). Subsequent trie accesses
// from higher diff layers would fall through to the disk layer, find
// nothing, and produce MissingNodeError.
//
// For read-only tries (only get operations, no commit), this dirty
// marking is harmless — the nodes are discarded when the trie is GC'd.
markSubtreeDirty(resolved)
return resolved, nil
}
// subtreeDepth returns the maximum depth of a trie subtree.
func subtreeDepth(n node) int {
switch n := n.(type) {
case *fullNode:
max := 0
for _, child := range &n.Children {
if child != nil {
if d := subtreeDepth(child); d > max {
max = d
}
}
}
return 1 + max
case *shortNode:
return 1 + subtreeDepth(n.Val)
default:
return 0
}
}
// markSubtreeDirty recursively marks all fullNode and shortNode in the
// subtree as dirty, preserving any cached hashes. This ensures the
// committer will capture them in the NodeSet during trie commit.
func markSubtreeDirty(n node) {
switch n := n.(type) {
case *fullNode:
n.flags.dirty = true
for _, child := range n.Children[:16] {
if child != nil {
markSubtreeDirty(child)
}
}
case *shortNode:
n.flags.dirty = true
markSubtreeDirty(n.Val)
}
// valueNode, hashNode, nil: no flags to mark
}
func archiveRecordsToNode(records []*archive.Record) (node, error) {
if len(records) == 0 {
return nil, archive.EmptyArchiveRecord
}
// Build the trie incrementally from nil to produce the canonical
// MPT structure. Starting with a fullNode would be wrong when the
// original subtree root was a shortNode (shared prefix).
var root node
for i, record := range records {
if err := validateRecordPath(record.Path); err != nil {
return nil, err
}
key, err := normalizeRecordKey(record.Path)
if err != nil {
return nil, err
}
if len(key) < 1 {
return nil, fmt.Errorf("empty key in record #%d", i)
}
root, err = insertTrieNode(root, key, valueNode(record.Value))
if err != nil {
return nil, err
}
}
return root, nil
}
func validateRecordPath(path []byte) error {
for i, b := range path {
if b > 16 {
return fmt.Errorf("invalid nibble in record path: %d", b)
}
if b == 16 && i != len(path)-1 {
return fmt.Errorf("terminator nibble in middle of record path")
}
}
return nil
}
// normalizeRecordKey ensures the record path is a hex-nibble key suitable for
// leaf insertion by guaranteeing a single terminator nibble and preserving any
// already-terminated path. Empty paths are normalized to a sole terminator.
func normalizeRecordKey(path []byte) ([]byte, error) {
if len(path) == 0 {
return []byte{16}, nil
}
if hasTerm(path) {
return path, nil
}
key := append([]byte{}, path...)
key = append(key, 16)
return key, nil
}
func insertTrieNode(n node, key []byte, value node) (node, error) {
if len(key) == 0 {
return value, nil
}
switch n := n.(type) {
case *shortNode:
matchlen := prefixLen(key, n.Key)
if matchlen == len(n.Key) {
nn, err := insertTrieNode(n.Val, key[matchlen:], value)
if err != nil {
return nil, err
}
return &shortNode{Key: n.Key, Val: nn}, nil
}
branch := &fullNode{}
var err error
branch.Children[n.Key[matchlen]], err = insertTrieNode(nil, n.Key[matchlen+1:], n.Val)
if err != nil {
return nil, err
}
branch.Children[key[matchlen]], err = insertTrieNode(nil, key[matchlen+1:], value)
if err != nil {
return nil, err
}
if matchlen == 0 {
return branch, nil
}
return &shortNode{Key: key[:matchlen], Val: branch}, nil
case *fullNode:
child, err := insertTrieNode(n.Children[key[0]], key[1:], value)
if err != nil {
return nil, err
}
n.Children[key[0]] = child
return n, nil
case nil:
return &shortNode{Key: key, Val: value}, nil
default:
return nil, fmt.Errorf("invalid node type in trie insert: %T", n)
}
}

601
trie/expired_node_test.go Normal file
View file

@ -0,0 +1,601 @@
// Copyright 2026 go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
"bytes"
"errors"
"os"
"path/filepath"
"testing"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie/archive"
)
// setupTestArchive creates a temporary archive directory with an archive file
// containing the given records, and configures archive.ArchiveDataDir to point
// to it. It returns the offset and size of the written data, and a cleanup function.
func setupTestArchive(t *testing.T, records []*archive.Record) (offset, size uint64, cleanup func()) {
t.Helper()
tmpDir := t.TempDir()
gethDir := filepath.Join(tmpDir, "geth")
if err := os.MkdirAll(gethDir, 0755); err != nil {
t.Fatal(err)
}
writer, err := archive.NewArchiveWriter(filepath.Join(gethDir, "nodearchive"))
if err != nil {
t.Fatal(err)
}
offset, size, err = writer.WriteSubtree(records)
if err != nil {
writer.Close()
t.Fatal(err)
}
writer.Close()
oldDir := archive.ArchiveDataDir
archive.ArchiveDataDir = tmpDir
return offset, size, func() {
archive.ArchiveDataDir = oldDir
}
}
func TestExpiredNodeEncodeDecode(t *testing.T) {
testCases := []struct {
offset uint64
size uint64
}{
{0, 0},
{1, 100},
{255, 1024},
{256, 4096},
{1 << 16, 1 << 20},
{1 << 32, 1 << 32},
{1<<64 - 1, 1<<64 - 1},
}
for _, tc := range testCases {
original := &expiredNode{offset: tc.offset, size: tc.size}
w := rlp.NewEncoderBuffer(nil)
original.encode(w)
encoded := w.ToBytes()
w.Flush()
decoded, err := decodeNodeUnsafe(nil, encoded)
if err != nil {
t.Fatalf("failed to decode expired node with offset %d, size %d: %v", tc.offset, tc.size, err)
}
expNode, ok := decoded.(*expiredNode)
if !ok {
t.Fatalf("decoded node is not an expired node, got %T", decoded)
}
if expNode.offset != original.offset {
t.Errorf("offset mismatch: got %d, want %d", expNode.offset, original.offset)
}
if expNode.size != original.size {
t.Errorf("size mismatch: got %d, want %d", expNode.size, original.size)
}
}
}
func TestExpiredNodeEncodedFormat(t *testing.T) {
node := &expiredNode{offset: 0x0102030405060708, size: 0x1112131415161718}
w := rlp.NewEncoderBuffer(nil)
node.encode(w)
encoded := w.ToBytes()
w.Flush()
expected := []byte{
0x00,
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
}
if !bytes.Equal(encoded, expected) {
t.Errorf("encoded format mismatch: got %x, want %x", encoded, expected)
}
}
func TestExpiredNodeFstring(t *testing.T) {
node := &expiredNode{offset: 12345, size: 6789}
s := node.fstring("")
if s != "<expired: offset=12345, size=6789> " {
t.Errorf("fstring mismatch: got %q", s)
}
}
func TestExpiredNodeCache(t *testing.T) {
node := &expiredNode{offset: 100}
hash, dirty := node.cache()
if hash != nil {
t.Error("expected nil hash from expired node cache")
}
if !dirty {
t.Error("expected dirty=true from expired node cache")
}
}
func TestExpiredNodeInvalidLength(t *testing.T) {
invalidCases := [][]byte{
{0x00},
{0x00, 0x01},
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08},
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f},
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11},
}
for _, buf := range invalidCases {
_, err := decodeNodeUnsafe(nil, buf)
if err == nil {
t.Errorf("expected error for buffer length %d, got nil", len(buf))
}
}
}
func TestExpiredNodeNoArchiveFile(t *testing.T) {
// When no archive file exists, Get should return an error
tmpDir := t.TempDir()
gethDir := filepath.Join(tmpDir, "geth")
if err := os.MkdirAll(gethDir, 0755); err != nil {
t.Fatal(err)
}
oldDir := archive.ArchiveDataDir
archive.ArchiveDataDir = tmpDir
defer func() { archive.ArchiveDataDir = oldDir }()
tr := NewEmpty(nil)
tr.root = &expiredNode{offset: 100, size: 50}
_, err := tr.Get([]byte("key"))
if err == nil {
t.Error("expected error when archive file doesn't exist")
}
}
func TestExpiredNodeWithResolver(t *testing.T) {
records := []*archive.Record{
{Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")},
}
offset, size, cleanup := setupTestArchive(t, records)
defer cleanup()
tr := NewEmpty(nil)
tr.root = &expiredNode{offset: offset, size: size}
val, err := tr.Get([]byte{0x12})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if string(val) != "testvalue" {
t.Errorf("value mismatch: got %q, want %q", val, "testvalue")
}
}
func TestExpiredNodeCopy(t *testing.T) {
original := &expiredNode{
offset: 12345,
size: 6789,
archiveResolver: archive.ArchivedNodeResolver,
}
copied := copyNode(original)
copiedExp, ok := copied.(*expiredNode)
if !ok {
t.Fatalf("copied node is not an expired node, got %T", copied)
}
if copiedExp.offset != original.offset {
t.Errorf("offset mismatch: got %d, want %d", copiedExp.offset, original.offset)
}
if copiedExp.size != original.size {
t.Errorf("size mismatch: got %d, want %d", copiedExp.size, original.size)
}
if copiedExp.archiveResolver == nil {
t.Error("archive resolver was not copied")
}
}
func TestArchiveRecordsToNodeEmpty(t *testing.T) {
_, err := archiveRecordsToNode([]*archive.Record{})
if !errors.Is(err, archive.EmptyArchiveRecord) {
t.Errorf("expected EmptyArchiveRecord error, got %v", err)
}
_, err = archiveRecordsToNode(nil)
if !errors.Is(err, archive.EmptyArchiveRecord) {
t.Errorf("expected EmptyArchiveRecord error for nil slice, got %v", err)
}
}
func TestArchiveRecordsToNodeMultiple(t *testing.T) {
records := []*archive.Record{
{Path: []byte{0x01, 16}, Value: []byte("value1")},
{Path: []byte{0x02, 16}, Value: []byte("value2")},
}
node, err := archiveRecordsToNode(records)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
fn, ok := node.(*fullNode)
if !ok {
t.Fatalf("expected fullNode, got %T", node)
}
if fn.Children[0x01] == nil {
t.Error("expected child at index 0x01")
}
if fn.Children[0x02] == nil {
t.Error("expected child at index 0x02")
}
}
func TestExpiredNodeGetMultipleRecords(t *testing.T) {
records := []*archive.Record{
{Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
{Path: []byte{0x04, 0x05, 16}, Value: []byte("value2")},
}
offset, size, cleanup := setupTestArchive(t, records)
defer cleanup()
tr := NewEmpty(nil)
tr.root = &expiredNode{offset: offset, size: size}
val, err := tr.Get([]byte{0x12})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if string(val) != "value1" {
t.Errorf("value mismatch: got %q, want %q", val, "value1")
}
tr2 := NewEmpty(nil)
tr2.root = &expiredNode{offset: offset, size: size}
val2, err := tr2.Get([]byte{0x45})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if string(val2) != "value2" {
t.Errorf("value mismatch: got %q, want %q", val2, "value2")
}
}
func TestExpiredNodeGetKeyNotFound(t *testing.T) {
records := []*archive.Record{
{Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
}
offset, size, cleanup := setupTestArchive(t, records)
defer cleanup()
tr := NewEmpty(nil)
tr.root = &expiredNode{offset: offset, size: size}
val, err := tr.Get([]byte{0xff, 0xff})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if val != nil {
t.Errorf("expected nil value for non-existent key, got %q", val)
}
}
func TestExpiredNodeGetPathMismatch(t *testing.T) {
records := []*archive.Record{
{Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")},
}
offset, size, cleanup := setupTestArchive(t, records)
defer cleanup()
tr := NewEmpty(nil)
tr.root = &expiredNode{offset: offset, size: size}
val, err := tr.Get([]byte{0x19})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if val != nil {
t.Errorf("expected nil value when leaf key doesn't match, got %q", val)
}
}
func TestExpiredNodeInsert(t *testing.T) {
records := []*archive.Record{
{Path: []byte{0x01, 0x02, 16}, Value: []byte("existing")},
}
offset, size, cleanup := setupTestArchive(t, records)
defer cleanup()
tr := NewEmpty(nil)
tr.root = &expiredNode{offset: offset, size: size}
err := tr.Update([]byte{0x45}, []byte("newvalue"))
if err != nil {
t.Fatalf("unexpected error on insert: %v", err)
}
val, err := tr.Get([]byte{0x45})
if err != nil {
t.Fatalf("unexpected error on get: %v", err)
}
if string(val) != "newvalue" {
t.Errorf("value mismatch: got %q, want %q", val, "newvalue")
}
}
func TestExpiredNodeUpdate(t *testing.T) {
records := []*archive.Record{
{Path: []byte{0x01, 0x02, 16}, Value: []byte("oldvalue")},
}
offset, size, cleanup := setupTestArchive(t, records)
defer cleanup()
tr := NewEmpty(nil)
tr.root = &expiredNode{offset: offset, size: size}
err := tr.Update([]byte{0x12}, []byte("newvalue"))
if err != nil {
t.Fatalf("unexpected error on update: %v", err)
}
val, err := tr.Get([]byte{0x12})
if err != nil {
t.Fatalf("unexpected error on get: %v", err)
}
if string(val) != "newvalue" {
t.Errorf("value mismatch: got %q, want %q", val, "newvalue")
}
}
func TestExpiredNodeDelete(t *testing.T) {
records := []*archive.Record{
{Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
{Path: []byte{0x04, 0x05, 16}, Value: []byte("value2")},
}
offset, size, cleanup := setupTestArchive(t, records)
defer cleanup()
tr := NewEmpty(nil)
tr.root = &expiredNode{offset: offset, size: size}
err := tr.Delete([]byte{0x12})
if err != nil {
t.Fatalf("unexpected error on delete: %v", err)
}
val, err := tr.Get([]byte{0x12})
if err != nil {
t.Fatalf("unexpected error on get after delete: %v", err)
}
if val != nil {
t.Errorf("expected nil after delete, got %q", val)
}
val2, err := tr.Get([]byte{0x45})
if err != nil {
t.Fatalf("unexpected error getting other key: %v", err)
}
if string(val2) != "value2" {
t.Errorf("other value should still exist: got %q, want %q", val2, "value2")
}
}
func TestTrieCopyPreservesArchiveResolver(t *testing.T) {
records := []*archive.Record{
{Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")},
}
offset, size, cleanup := setupTestArchive(t, records)
defer cleanup()
tr := NewEmpty(nil)
tr.root = &expiredNode{offset: offset, size: size}
trCopy := tr.Copy()
val, err := trCopy.Get([]byte{0x12})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if string(val) != "testvalue" {
t.Errorf("value mismatch: got %q, want %q", val, "testvalue")
}
}
func TestWalkWithExpiredNodes(t *testing.T) {
records := []*archive.Record{
{Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
{Path: []byte{0x04, 0x05, 16}, Value: []byte("value2")},
{Path: []byte{0x07, 0x08, 16}, Value: []byte("value3")},
}
offset, size, cleanup := setupTestArchive(t, records)
defer cleanup()
tr := NewEmpty(nil)
tr.root = &expiredNode{offset: offset, size: size}
var leaves []string
stats, err := tr.Walk(func(path []byte, value []byte) error {
leaves = append(leaves, string(value))
return nil
})
if err != nil {
t.Fatalf("Walk failed: %v", err)
}
if stats.Leaves != 3 {
t.Errorf("expected 3 leaves, got %d", stats.Leaves)
}
if stats.ExpiredResolved != 1 {
t.Errorf("expected 1 expired resolved, got %d", stats.ExpiredResolved)
}
// Verify all values were visited
expected := map[string]bool{"value1": true, "value2": true, "value3": true}
for _, leaf := range leaves {
if !expected[leaf] {
t.Errorf("unexpected leaf value: %q", leaf)
}
delete(expected, leaf)
}
if len(expected) > 0 {
t.Errorf("missing leaves: %v", expected)
}
}
func TestWalkEmptyTrie(t *testing.T) {
tr := NewEmpty(nil)
stats, err := tr.Walk(func(path []byte, value []byte) error {
t.Error("callback should not be called for empty trie")
return nil
})
if err != nil {
t.Fatalf("Walk failed: %v", err)
}
if stats.Leaves != 0 || stats.ExpiredResolved != 0 {
t.Errorf("expected zero stats for empty trie, got leaves=%d expired=%d", stats.Leaves, stats.ExpiredResolved)
}
}
func TestWalkCallbackError(t *testing.T) {
records := []*archive.Record{
{Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
}
offset, size, cleanup := setupTestArchive(t, records)
defer cleanup()
tr := NewEmpty(nil)
tr.root = &expiredNode{offset: offset, size: size}
testErr := errors.New("test error")
_, err := tr.Walk(func(path []byte, value []byte) error {
return testErr
})
if !errors.Is(err, testErr) {
t.Fatalf("expected test error, got %v", err)
}
}
// TestExpiredNodeResolvedSubtreeDirty verifies that when an expired node is
// resolved and a sibling leaf is modified, the commit captures ALL resolved
// nodes (not just the modified path). Without this fix, resolved-but-unmodified
// nodes would be lost: not in the diff layer (clean) and not in the raw DB
// (deleted by archiver).
func TestExpiredNodeResolvedSubtreeDirty(t *testing.T) {
// Use large values (>32 bytes) so leaf nodes are NOT embedded in
// their parent. This matches production storage tries where
// intermediate nodes are large enough to be stored independently.
bigVal1 := bytes.Repeat([]byte("A"), 40)
bigVal2 := bytes.Repeat([]byte("B"), 40)
// Create an archive with records under different branches.
records := []*archive.Record{
{Path: []byte{0x01, 0x02, 16}, Value: bigVal1},
{Path: []byte{0x04, 0x05, 16}, Value: bigVal2},
}
offset, size, cleanup := setupTestArchive(t, records)
defer cleanup()
tr := NewEmpty(nil)
tr.root = &expiredNode{offset: offset, size: size}
// Insert a value that goes through one branch of the resolved subtree.
// This modifies path [1, ...] but leaves path [4, ...] unmodified.
if err := tr.Update([]byte{0x12}, bytes.Repeat([]byte("C"), 40)); err != nil {
t.Fatalf("Update failed: %v", err)
}
// Commit the trie. The NodeSet should be non-nil because we modified data.
_, nodes := tr.Commit(false)
if nodes == nil {
t.Fatal("expected non-nil NodeSet after modifying expired subtree")
}
// The resolved-but-unmodified sibling (path [4, 5]) should also be
// captured in the NodeSet, because markSubtreeDirty ensures all resolved
// nodes are dirty. Count the nodes to verify.
nodeCount := len(nodes.Nodes)
// We expect at least 3 nodes: the root, the modified branch, and the
// sibling branch. The exact count depends on trie structure.
if nodeCount < 3 {
t.Errorf("expected at least 3 nodes in NodeSet (root + modified + sibling), got %d", nodeCount)
}
}
// TestMarkSubtreeDirty verifies that markSubtreeDirty correctly sets the dirty
// flag on all nodes in a subtree while preserving cached hashes.
func TestMarkSubtreeDirty(t *testing.T) {
// Build a small trie structure
leaf1 := &shortNode{Key: []byte{1, 16}, Val: valueNode("v1")}
leaf2 := &shortNode{Key: []byte{2, 16}, Val: valueNode("v2")}
branch := &fullNode{}
branch.Children[1] = leaf1
branch.Children[2] = leaf2
// Set hash but not dirty (as if loaded from DB)
branch.flags = nodeFlag{hash: hashNode("testhash"), dirty: false}
leaf1.flags = nodeFlag{hash: hashNode("hash1"), dirty: false}
leaf2.flags = nodeFlag{hash: hashNode("hash2"), dirty: false}
markSubtreeDirty(branch)
// All nodes should be dirty
if !branch.flags.dirty {
t.Error("branch should be dirty")
}
if !leaf1.flags.dirty {
t.Error("leaf1 should be dirty")
}
if !leaf2.flags.dirty {
t.Error("leaf2 should be dirty")
}
// Hashes should be preserved
if !bytes.Equal(branch.flags.hash, hashNode("testhash")) {
t.Error("branch hash should be preserved")
}
if !bytes.Equal(leaf1.flags.hash, hashNode("hash1")) {
t.Error("leaf1 hash should be preserved")
}
if !bytes.Equal(leaf2.flags.hash, hashNode("hash2")) {
t.Error("leaf2 hash should be preserved")
}
}
func TestExpiredNodeGetNode(t *testing.T) {
records := []*archive.Record{
{Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")},
}
offset, size, cleanup := setupTestArchive(t, records)
defer cleanup()
tr := NewEmpty(nil)
tr.root = &expiredNode{offset: offset, size: size}
_, _, err := tr.GetNode(hexToCompact([]byte{0x01, 0x02}))
if err != nil && err.Error() != "non-consensus node" {
t.Fatalf("unexpected error: %v", err)
}
}

View file

@ -18,6 +18,7 @@ package trie
import (
"bytes"
"encoding/binary"
"fmt"
"sync"
@ -97,6 +98,22 @@ func (h *hasher) hash(n node, force bool) []byte {
// hash nodes don't have children, so they're left as were
return n
case *expiredNode:
// Return the original subtree hash that was cached when the
// expired node was decoded. The parent node references this
// hash, so we must return the same value to keep the Merkle
// root consistent.
if n.cachedHash != nil {
return n.cachedHash
}
// Fallback: hash the marker blob (should not happen in practice
// because decodeNodeUnsafe always provides the hash).
var buf [1 + 2*8]byte // 17 bytes
buf[0] = expiredNodeMarker
binary.BigEndian.PutUint64(buf[1:], n.offset)
binary.BigEndian.PutUint64(buf[9:], n.size)
return h.hashData(buf[:])
default:
panic(fmt.Errorf("unexpected node type, %T", n))
}
@ -214,6 +231,12 @@ func (h *hasher) proofHash(original node) []byte {
return bytes.Clone(h.encodeShortNode(n))
case *fullNode:
return bytes.Clone(h.encodeFullNode(n))
case *expiredNode:
var buf [1 + 2*8]byte
buf[0] = expiredNodeMarker
binary.BigEndian.PutUint64(buf[1:], n.offset)
binary.BigEndian.PutUint64(buf[9:], n.size)
return buf[:]
default:
panic(fmt.Errorf("unexpected node type, %T", original))
}

View file

@ -18,13 +18,16 @@ package trie
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"strings"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie/archive"
)
var indices = []string{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "[17]"}
@ -158,6 +161,15 @@ func decodeNodeUnsafe(hash, buf []byte) (node, error) {
if len(buf) == 0 {
return nil, io.ErrUnexpectedEOF
}
if buf[0] == expiredNodeMarker {
if len(buf) != 1+2*archive.OffsetSize {
return nil, fmt.Errorf("invalid expired node length: %d", len(buf))
}
offset := binary.BigEndian.Uint64(buf[1:])
size := binary.BigEndian.Uint64(buf[1+archive.OffsetSize:])
log.Debug("Decoded expired node", "offset", offset, "size", size, "hash", common.BytesToHash(hash))
return &expiredNode{offset: offset, size: size, cachedHash: hashNode(hash), archiveResolver: archive.ArchivedNodeResolver}, nil
}
elems, _, err := rlp.SplitList(buf)
if err != nil {
return nil, fmt.Errorf("decode error: %v", err)

View file

@ -25,6 +25,7 @@ import (
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie/archive"
)
// Prove constructs a merkle proof for key. The result contains all encoded nodes
@ -78,6 +79,16 @@ func (t *Trie) Prove(key []byte, proofDb ethdb.KeyValueWriter) error {
// clean cache or the database, they are all in their own
// copy and safe to use unsafe decoder.
tn = mustDecodeNodeUnsafe(n, blob)
case *expiredNode:
records, err := archive.ArchivedNodeResolver(n.offset, n.size)
if err != nil {
return fmt.Errorf("failed to resolve expired node in proof: %w", err)
}
resolved, err := archiveRecordsToNode(records)
if err != nil {
return fmt.Errorf("failed to rebuild expired node in proof: %w", err)
}
tn = resolved
default:
panic(fmt.Sprintf("%T: invalid node: %v", tn, tn))
}
@ -617,6 +628,8 @@ func get(tn node, key []byte, skipResolved bool) ([]byte, node) {
}
case hashNode:
return key, n
case *expiredNode:
return key, n
case nil:
return key, nil
case valueNode:

View file

@ -26,6 +26,7 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie/archive"
"github.com/ethereum/go-ethereum/trie/trienode"
"github.com/ethereum/go-ethereum/triedb/database"
"golang.org/x/sync/errgroup"
@ -57,6 +58,10 @@ type Trie struct {
// reader is the handler trie can retrieve nodes from.
reader *Reader
// archiveResolver is an optional callback to resolve expired nodes from
// an archive file.
archiveResolver archive.ResolverFn
// Various tracers for capturing the modifications to trie
opTracer *opTracer
prevalueTracer *PrevalueTracer
@ -70,17 +75,23 @@ func (t *Trie) newFlag() nodeFlag {
// Copy returns a copy of Trie.
func (t *Trie) Copy() *Trie {
return &Trie{
root: copyNode(t.root),
owner: t.owner,
committed: t.committed,
unhashed: t.unhashed,
uncommitted: t.uncommitted,
reader: t.reader,
opTracer: t.opTracer.copy(),
prevalueTracer: t.prevalueTracer.Copy(),
root: copyNode(t.root),
owner: t.owner,
committed: t.committed,
unhashed: t.unhashed,
uncommitted: t.uncommitted,
reader: t.reader,
archiveResolver: t.archiveResolver,
opTracer: t.opTracer.copy(),
prevalueTracer: t.prevalueTracer.Copy(),
}
}
// SetArchiveResolver sets the archive resolver callback for expired nodes.
func (t *Trie) SetArchiveResolver(resolver archive.ResolverFn) {
t.archiveResolver = resolver
}
// New creates the trie instance with provided trie id and the read-only
// database. The state specified by trie id must be available, otherwise
// an error will be returned. The trie root specified by trie id can be
@ -218,6 +229,14 @@ func (t *Trie) get(origNode node, key []byte, pos int) (value []byte, newnode no
}
value, newnode, _, err := t.get(child, key, pos)
return value, newnode, true, err
case *expiredNode:
log.Debug("Resolving expired node in get()", "owner", t.owner, "offset", n.offset, "size", n.size, "pos", pos)
newnode, err := resolveExpiredNodeData(n)
if err != nil {
return nil, n, false, err
}
value, _, _, err = t.get(newnode, key, pos)
return value, newnode, true, err
default:
panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode))
}
@ -352,6 +371,14 @@ func (t *Trie) getNode(origNode node, path []byte, pos int) (item []byte, newnod
item, newnode, resolved, err := t.getNode(child, path, pos)
return item, newnode, resolved + 1, err
case *expiredNode:
rn, err := resolveExpiredNodeData(n)
if err != nil {
return nil, n, 0, err
}
item, newnode, resolvedCount, err := t.getNode(rn, path, pos)
return item, newnode, resolvedCount + 1, err
default:
panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode))
}
@ -475,6 +502,18 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error
}
return true, nn, nil
case *expiredNode:
log.Debug("Resolving expired node in insert()", "owner", t.owner, "offset", n.offset, "size", n.size)
rn, err := resolveExpiredNodeData(n)
if err != nil {
return false, nil, err
}
dirty, nn, err := t.insert(rn, prefix, key, value)
if !dirty || err != nil {
return false, rn, err
}
return true, nn, nil
default:
panic(fmt.Sprintf("%T: invalid node: %v", n, n))
}
@ -636,6 +675,18 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) {
}
return true, nn, nil
case *expiredNode:
log.Debug("Resolving expired node in delete()", "owner", t.owner, "offset", n.offset, "size", n.size)
rn, err := resolveExpiredNodeData(n)
if err != nil {
return false, nil, err
}
dirty, nn, err := t.delete(rn, prefix, key)
if !dirty || err != nil {
return false, rn, err
}
return true, nn, nil
default:
panic(fmt.Sprintf("%T: invalid node: %v (%v)", n, n, key))
}
@ -666,14 +717,24 @@ func copyNode(n node) node {
}
case hashNode:
return n
case *expiredNode:
return &expiredNode{
offset: n.offset,
size: n.size,
cachedHash: common.CopyBytes(n.cachedHash),
archiveResolver: n.archiveResolver,
}
default:
panic(fmt.Sprintf("%T: unknown node type", n))
}
}
func (t *Trie) resolve(n node, prefix []byte) (node, error) {
if n, ok := n.(hashNode); ok {
switch n := n.(type) {
case hashNode:
return t.resolveAndTrack(n, prefix)
case *expiredNode:
return resolveExpiredNodeData(n)
}
return n, nil
}
@ -784,6 +845,58 @@ func (t *Trie) Witness() map[string][]byte {
return t.prevalueTracer.Values()
}
// WalkStats holds statistics from a Walk traversal.
type WalkStats struct {
Leaves int // Number of leaf nodes visited
ExpiredResolved int // Number of expired nodes resolved from archive
}
// Walk recursively traverses the trie, resolving all nodes including
// hashNodes and expiredNodes. It calls fn for each leaf found.
// This triggers hash verification for expired nodes via cachedHash.
func (t *Trie) Walk(fn func(path []byte, value []byte) error) (WalkStats, error) {
return t.walk(t.root, nil, fn)
}
func (t *Trie) walk(n node, path []byte, fn func([]byte, []byte) error) (WalkStats, error) {
switch n := n.(type) {
case *shortNode:
return t.walk(n.Val, append(append([]byte{}, path...), n.Key...), fn)
case *fullNode:
var stats WalkStats
for i, child := range n.Children[:16] {
if child != nil {
childStats, err := t.walk(child, append(append([]byte{}, path...), byte(i)), fn)
if err != nil {
return stats, err
}
stats.Leaves += childStats.Leaves
stats.ExpiredResolved += childStats.ExpiredResolved
}
}
return stats, nil
case hashNode:
resolved, err := t.resolveAndTrack(n, path)
if err != nil {
return WalkStats{}, err
}
return t.walk(resolved, path, fn)
case *expiredNode:
resolved, err := resolveExpiredNodeData(n)
if err != nil {
return WalkStats{}, err
}
childStats, err := t.walk(resolved, path, fn)
childStats.ExpiredResolved++
return childStats, err
case valueNode:
return WalkStats{Leaves: 1}, fn(path, []byte(n))
case nil:
return WalkStats{}, nil
}
return WalkStats{}, nil
}
// reset drops the referenced root node and cleans all internal state.
func (t *Trie) reset() {
t.root = nil

View file

@ -399,6 +399,28 @@ func (db *Database) Disk() ethdb.Database {
return db.disk
}
// DiffHead returns the root hash of the topmost diff layer in pathdb.
// If there are no diff layers or the backend is not pathdb, it returns
// the zero hash and false.
func (db *Database) DiffHead() (common.Hash, bool) {
pdb, ok := db.backend.(*pathdb.Database)
if !ok {
return common.Hash{}, false
}
return pdb.DiffHead()
}
// DisableStateHistory closes and disables the state history freezer.
// This is used by the archiver to bypass state history writes during
// diff layer flushing when state history may have gaps.
func (db *Database) DisableStateHistory() {
pdb, ok := db.backend.(*pathdb.Database)
if !ok {
return
}
pdb.DisableStateHistory()
}
// SnapshotCompleted returns the indicator if the snapshot is completed.
func (db *Database) SnapshotCompleted() bool {
pdb, ok := db.backend.(*pathdb.Database)

View file

@ -318,6 +318,30 @@ func (db *Database) Update(root common.Hash, parentRoot common.Hash, block uint6
return db.tree.cap(root, maxDiffLayers)
}
// DiffHead returns the root hash of the topmost diff layer. If there are no
// diff layers (only the disk layer), it returns the disk layer root and false.
func (db *Database) DiffHead() (common.Hash, bool) {
db.lock.RLock()
defer db.lock.RUnlock()
return db.tree.diffHead()
}
// DisableStateHistory closes and disables the state history freezer. This is
// used by the archiver to bypass state history writes during diff layer flushing,
// since the archiver only needs trie nodes committed to disk and state history
// may have gaps from unclean shutdowns that prevent sequential appends.
func (db *Database) DisableStateHistory() {
db.lock.Lock()
defer db.lock.Unlock()
if db.stateFreezer != nil {
db.stateFreezer.Close()
db.stateFreezer = nil
log.Info("Disabled state history freezer")
}
}
// Commit traverses downwards the layer tree from a specified layer with the
// provided state root and all the layers below are flattened downwards. It
// can be used alone and mostly for test purposes.

View file

@ -278,9 +278,17 @@ func truncateFromHead(store ethdb.AncientStore, typ historyType, nhead uint64) (
return 0, err
}
// Ensure that the truncation target falls within the valid range.
if ohead < nhead || nhead < otail {
if nhead < otail {
return 0, fmt.Errorf("%w, %s, tail: %d, head: %d, target: %d", errHeadTruncationOutOfRange, typ, otail, ohead, nhead)
}
// If the target is ahead of the current head, there's nothing to truncate.
// This can happen after unclean shutdowns where the state history was not
// fully written.
if ohead < nhead {
log.Warn("State history shorter than target, nothing to truncate",
"type", typ.String(), "head", ohead, "target", nhead)
return 0, nil
}
// Short circuit if nothing to truncate.
if ohead == nhead {
return 0, nil

View file

@ -244,8 +244,8 @@ func TestTruncateOutOfRange(t *testing.T) {
target uint64
expErr error
}{
{0, head, nil}, // nothing to delete
{0, head + 1, errHeadTruncationOutOfRange},
{0, head, nil}, // nothing to delete
{0, head + 1, nil}, // gracefully handled after unclean shutdown
{0, tail - 1, errHeadTruncationOutOfRange},
{1, tail, nil}, // nothing to delete
{1, head + 1, errTailTruncationOutOfRange},

View file

@ -31,6 +31,7 @@ import (
// of the referenced layer by themselves.
type layerTree struct {
base *diskLayer
head common.Hash // Root hash of the topmost layer (diff or disk)
layers map[common.Hash]layer
// descendants is a two-dimensional map where the keys represent
@ -59,6 +60,7 @@ func (tree *layerTree) init(head layer) {
defer tree.lock.Unlock()
current := head
tree.head = head.rootHash()
tree.layers = make(map[common.Hash]layer)
tree.descendants = make(map[common.Hash]map[common.Hash]struct{})
@ -76,6 +78,18 @@ func (tree *layerTree) init(head layer) {
tree.lookup = newLookup(head, tree.isDescendant)
}
// diffHead returns the root hash of the topmost diff layer. If there are no
// diff layers, returns the disk layer root and false.
func (tree *layerTree) diffHead() (common.Hash, bool) {
tree.lock.RLock()
defer tree.lock.RUnlock()
if _, ok := tree.layers[tree.head].(*diffLayer); ok {
return tree.head, true
}
return tree.base.rootHash(), false
}
// get retrieves a layer belonging to the given state root.
func (tree *layerTree) get(root common.Hash) layer {
tree.lock.RLock()

View file

@ -69,7 +69,7 @@ func (r *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte,
return nil, err
}
// Error out if the local one is inconsistent with the target.
if !r.noHashCheck && got != hash {
if !r.noHashCheck && (len(blob) > 0 && blob[0] != 0) && got != hash {
// Location is always available even if the node
// is not found.
switch loc.loc {