diff --git a/cmd/geth/archivecmd.go b/cmd/geth/archivecmd.go
new file mode 100644
index 0000000000..d6e241974e
--- /dev/null
+++ b/cmd/geth/archivecmd.go
@@ -0,0 +1,576 @@
+// Copyright 2026 The go-ethereum Authors
+// This file is part of go-ethereum.
+//
+// go-ethereum is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// go-ethereum is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with go-ethereum. If not, see .
+
+package main
+
+import (
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "os"
+ "path/filepath"
+ "slices"
+ "time"
+
+ "github.com/ethereum/go-ethereum/cmd/utils"
+ "github.com/ethereum/go-ethereum/common"
+ "github.com/ethereum/go-ethereum/core/rawdb"
+ "github.com/ethereum/go-ethereum/core/types"
+ "github.com/ethereum/go-ethereum/crypto"
+ "github.com/ethereum/go-ethereum/ethdb"
+ "github.com/ethereum/go-ethereum/log"
+ "github.com/ethereum/go-ethereum/rlp"
+ "github.com/ethereum/go-ethereum/trie"
+ "github.com/ethereum/go-ethereum/trie/archive"
+ "github.com/ethereum/go-ethereum/triedb/database"
+ "github.com/urfave/cli/v2"
+)
+
+var (
+ // Flags for the archive command
+ archiveOutputFlag = &cli.StringFlag{
+ Name: "output",
+ Usage: "Path to archive output file",
+ Value: "", // Default: /nodearchive
+ }
+ archiveCompactionIntervalFlag = &cli.Uint64Flag{
+ Name: "compaction-interval",
+ Usage: "Run compaction after this many subtrees (0 = disable)",
+ Value: 1000,
+ }
+ archiveDryRunFlag = &cli.BoolFlag{
+ Name: "dry-run",
+ Usage: "Simulate without modifying database",
+ }
+
+ // Commands
+ archiveCheckNodeFlag = &cli.StringFlag{
+ Name: "owner",
+ Usage: "Owner hash (hex) for the trie node to check",
+ }
+ archiveCheckPathFlag = &cli.StringFlag{
+ Name: "path",
+ Usage: "Path (hex nibbles) of the trie node to check",
+ }
+
+ archiveCommand = &cli.Command{
+ Name: "archive",
+ Usage: "Archive state trie nodes to reduce database size",
+ Subcommands: []*cli.Command{
+ archiveGenerateCmd,
+ archiveVerifyCmd,
+ archiveDeleteJournalCmd,
+ archiveCheckNodeCmd,
+ },
+ }
+
+ archiveCheckNodeCmd = &cli.Command{
+ Name: "check-node",
+ Usage: "Check if a specific trie node exists in the raw DB",
+ Action: archiveCheckNode,
+ Flags: slices.Concat([]cli.Flag{
+ archiveCheckNodeFlag,
+ archiveCheckPathFlag,
+ }, utils.NetworkFlags, utils.DatabaseFlags),
+ }
+
+ archiveDeleteJournalCmd = &cli.Command{
+ Name: "delete-journal",
+ Usage: "Delete the pathdb journal to force a clean restart",
+ Action: archiveDeleteJournal,
+ Flags: slices.Concat(utils.NetworkFlags, utils.DatabaseFlags),
+ Description: `
+Deletes the pathdb journal (TrieJournal key and merkle.journal file) from the
+database. This forces geth to restart with a bare disk layer, discarding any
+in-memory diff layers that may be inconsistent with archived state.
+
+Use this after running 'archive generate' if geth was started in between and
+recreated the journal.
+
+Examples:
+ geth archive delete-journal --datadir /path/to/datadir
+ geth archive delete-journal --hoodi
+`,
+ }
+
+ archiveVerifyCmd = &cli.Command{
+ Name: "verify",
+ Usage: "Verify all archived nodes can be correctly resurrected",
+ Action: archiveVerify,
+ Flags: slices.Concat(utils.NetworkFlags, utils.DatabaseFlags),
+ Description: `
+Walks the entire state trie, resolving every expired node from the archive
+file and verifying that the reconstructed subtree hash matches the original.
+Also walks all storage tries referenced by accounts.
+
+The database is opened read-only. No modifications are made.
+
+Examples:
+ geth archive verify --datadir /path/to/datadir
+ geth archive verify --hoodi
+`,
+ }
+
+ archiveGenerateCmd = &cli.Command{
+ Name: "generate",
+ Usage: "Generate archive files from height-3 subtrees",
+ ArgsUsage: "[state-root]",
+ Action: archiveGenerate,
+ Flags: slices.Concat([]cli.Flag{
+ archiveOutputFlag,
+ archiveCompactionIntervalFlag,
+ archiveDryRunFlag,
+ }, utils.NetworkFlags, utils.DatabaseFlags),
+ Description: `
+Walks the state trie of the specified root (or head block) and archives
+subtrees at height 3. Each archived subtree is replaced with an expiredNode
+that references the archive file offset and size.
+
+Height is measured from leaves: leaves=0, parents=1, etc. A height-3 node
+has leaves at most 3 levels below it.
+
+The archiver reads trie nodes directly from the persistent database layer,
+bypassing any in-memory diff layers. This ensures consistency between the
+data it reads and the data it modifies.
+
+Examples:
+ # Archive from the persistent disk state
+ geth archive generate --datadir /path/to/datadir
+
+ # Dry run to see what would be archived
+ geth archive generate --dry-run --datadir /path/to/datadir
+
+ # Custom output and compaction interval
+ geth archive generate --output /path/to/archive --compaction-interval 500
+`,
+ }
+)
+
+// rawDBNodeReader implements database.NodeReader by reading trie nodes directly
+// from the raw key-value database, bypassing pathdb's in-memory diff layers.
+// This ensures the archiver sees the same trie state it modifies.
+type rawDBNodeReader struct {
+ db ethdb.KeyValueReader
+}
+
+func (r *rawDBNodeReader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) {
+ var blob []byte
+ if owner == (common.Hash{}) {
+ blob = rawdb.ReadAccountTrieNode(r.db, path)
+ } else {
+ blob = rawdb.ReadStorageTrieNode(r.db, owner, path)
+ }
+ // Skip hash verification: the raw DB may contain expiredNode markers
+ // (blob[0] == 0x00) which have different hashes than the original nodes.
+ return blob, nil
+}
+
+// rawDBNodeDatabase implements database.NodeDatabase using direct raw DB reads.
+type rawDBNodeDatabase struct {
+ db ethdb.KeyValueReader
+ root common.Hash
+}
+
+func (d *rawDBNodeDatabase) NodeReader(stateRoot common.Hash) (database.NodeReader, error) {
+ // Only allow reading the persistent disk root state
+ if stateRoot != d.root {
+ return nil, fmt.Errorf("raw DB reader only supports disk root %x, got %x", d.root, stateRoot)
+ }
+ return &rawDBNodeReader{db: d.db}, nil
+}
+
+func archiveGenerate(ctx *cli.Context) error {
+ // 1. Setup node and databases
+ stack, _ := makeConfigNode(ctx)
+ defer stack.Close()
+
+ dryRun := ctx.Bool(archiveDryRunFlag.Name)
+ chaindb := utils.MakeChainDatabase(ctx, stack, dryRun)
+ defer chaindb.Close()
+
+ // Check state scheme - we only support PathDB
+ scheme := cycleCheckScheme(ctx, chaindb)
+ if scheme != rawdb.PathScheme {
+ return fmt.Errorf("archive generation requires path-based state scheme, got: %s", scheme)
+ }
+
+ // 2. Flush diff layers to disk via pathdb. This ensures the raw DB
+ // contains the complete, up-to-date state trie and that state history
+ // entries are properly written to the freezer.
+ trieDB := utils.MakeTrieDatabase(ctx, stack, chaindb, false, dryRun, false)
+ head, hasDiff := trieDB.DiffHead()
+ if hasDiff {
+ log.Info("Flushing diff layers to disk", "head", head)
+ if err := trieDB.Commit(head, true); err != nil {
+ trieDB.Close()
+ return fmt.Errorf("failed to flush diff layers: %w", err)
+ }
+ log.Info("Diff layers flushed successfully")
+ } else {
+ log.Info("No diff layers to flush, disk state is current", "root", head)
+ }
+ // Close triedb — we work directly with raw DB for archival.
+ // We'll re-open it at the end to write a fresh journal.
+ trieDB.Close()
+
+ // 3. Determine the disk state root (now up-to-date after flush).
+ rootBlob := rawdb.ReadAccountTrieNode(chaindb, nil)
+ if len(rootBlob) == 0 {
+ return errors.New("state trie not found in database")
+ }
+ root := crypto.Keccak256Hash(rootBlob)
+ log.Info("Using disk state root", "root", root)
+
+ // Create a raw DB node reader that bypasses pathdb layers
+ nodeDB := &rawDBNodeDatabase{db: chaindb, root: root}
+
+ // 4. Open archive writer (unless dry-run).
+ // The archive file is placed at /geth/nodearchive by default,
+ // matching the path used by ArchivedNodeResolver when reading back.
+ var writer *archive.ArchiveWriter
+ archivePath := ctx.String(archiveOutputFlag.Name)
+ if archivePath == "" {
+ archivePath = filepath.Join(stack.ResolvePath(""), "nodearchive")
+ }
+
+ if !dryRun {
+ var err error
+ writer, err = archive.NewArchiveWriter(archivePath)
+ if err != nil {
+ return fmt.Errorf("failed to open archive file %s: %w", archivePath, err)
+ }
+ defer writer.Close()
+ log.Info("Opened archive file", "path", archivePath)
+ } else {
+ log.Info("Dry run mode - no changes will be made")
+ }
+
+ // 5. Create and run archiver
+ archiver := trie.NewArchiver(
+ chaindb,
+ nodeDB,
+ writer,
+ ctx.Uint64(archiveCompactionIntervalFlag.Name),
+ dryRun,
+ )
+
+ start := time.Now()
+ if err := archiver.ProcessState(root); err != nil {
+ return fmt.Errorf("archive generation failed: %w", err)
+ }
+
+ // 6. Get stats and optionally run final compaction
+ subtrees, leaves, bytesDeleted := archiver.Stats()
+
+ if !dryRun && subtrees > 0 {
+ log.Info("Running final database compaction")
+ if err := chaindb.Compact(nil, nil); err != nil {
+ log.Warn("Final compaction failed", "err", err)
+ }
+ }
+
+ // 7. Re-journal the pathdb state with the current disk root.
+ // After archiving, some trie nodes have been replaced with expired
+ // markers. We re-open pathdb and write a fresh journal (disk layer
+ // only, since all diff layers were flushed in step 2) so that geth
+ // can restart cleanly.
+ if !dryRun {
+ log.Info("Re-journaling pathdb state")
+ freshTrieDB := utils.MakeTrieDatabase(ctx, stack, chaindb, false, false, false)
+ freshRoot := crypto.Keccak256Hash(rawdb.ReadAccountTrieNode(chaindb, nil))
+ if err := freshTrieDB.Journal(freshRoot); err != nil {
+ log.Warn("Failed to re-journal pathdb state", "err", err)
+ }
+ freshTrieDB.Close()
+ }
+
+ // 8. Print summary
+ var archiveSize uint64
+ if writer != nil {
+ archiveSize = writer.Offset()
+ }
+
+ log.Info("Archive generation complete",
+ "subtrees", subtrees,
+ "leaves", leaves,
+ "bytesDeleted", bytesDeleted,
+ "archiveSize", archiveSize,
+ "elapsed", common.PrettyDuration(time.Since(start)))
+
+ if dryRun {
+ log.Info("This was a dry run - no changes were made to the database")
+ }
+
+ return nil
+}
+
+func archiveVerify(ctx *cli.Context) error {
+ stack, _ := makeConfigNode(ctx)
+ defer stack.Close()
+
+ // Open database read-only
+ chaindb := utils.MakeChainDatabase(ctx, stack, true)
+ defer chaindb.Close()
+
+ scheme := cycleCheckScheme(ctx, chaindb)
+ if scheme != rawdb.PathScheme {
+ return fmt.Errorf("archive verify requires path-based state scheme, got: %s", scheme)
+ }
+
+ // Set archive data dir so ArchivedNodeResolver can find the file
+ // ResolvePath("") returns the node's data directory (e.g. .ethereum/hoodi/geth),
+ // but ArchivedNodeResolver expects the instance directory (.ethereum/hoodi)
+ // since it appends "geth/nodearchive" itself.
+ archive.ArchiveDataDir = filepath.Dir(stack.ResolvePath(""))
+
+ // Compute disk root
+ rootBlob := rawdb.ReadAccountTrieNode(chaindb, nil)
+ if len(rootBlob) == 0 {
+ return errors.New("state trie not found in database")
+ }
+ root := crypto.Keccak256Hash(rootBlob)
+ log.Info("Verifying archived nodes", "root", root)
+
+ nodeDB := &rawDBNodeDatabase{db: chaindb, root: root}
+
+ // Open account trie
+ accountTrie, err := trie.New(trie.StateTrieID(root), nodeDB)
+ if err != nil {
+ return fmt.Errorf("failed to open account trie: %w", err)
+ }
+
+ var (
+ totalAccounts int
+ totalStorageTries int
+ totalLeaves int
+ totalExpired int
+ totalErrors int
+ start = time.Now()
+ lastLog = time.Now()
+ )
+
+ // Walk the account trie — this resolves all expired nodes and verifies hashes
+ accountStats, err := accountTrie.Walk(func(path []byte, value []byte) error {
+ totalAccounts++
+ if time.Since(lastLog) > 30*time.Second {
+ log.Info("Verification progress",
+ "accounts", totalAccounts,
+ "storageTries", totalStorageTries,
+ "leaves", totalLeaves,
+ "expired", totalExpired,
+ "errors", totalErrors)
+ lastLog = time.Now()
+ }
+
+ // Decode account to check for storage trie
+ var acc types.StateAccount
+ if err := rlp.DecodeBytes(value, &acc); err != nil {
+ log.Warn("Failed to decode account", "err", err)
+ totalErrors++
+ return nil // continue walking
+ }
+ if acc.Root == types.EmptyRootHash {
+ return nil
+ }
+
+ // Open and walk storage trie.
+ // path is hex-nibble encoded (with a 16 terminator from the trie key),
+ // so convert nibble pairs back to the 32-byte account hash.
+ nibbles := path
+ if len(nibbles) > 0 && nibbles[len(nibbles)-1] == 16 {
+ nibbles = nibbles[:len(nibbles)-1]
+ }
+ keyBytes := make([]byte, len(nibbles)/2)
+ for i := 0; i < len(nibbles); i += 2 {
+ keyBytes[i/2] = nibbles[i]<<4 | nibbles[i+1]
+ }
+ accountHash := common.BytesToHash(keyBytes)
+ storageID := trie.StorageTrieID(root, accountHash, acc.Root)
+ storageTrie, err := trie.New(storageID, nodeDB)
+ if err != nil {
+ log.Warn("Failed to open storage trie", "account", accountHash, "err", err)
+ totalErrors++
+ return nil
+ }
+
+ storageStats, err := storageTrie.Walk(func(spath []byte, svalue []byte) error {
+ return nil
+ })
+ if err != nil {
+ log.Warn("Storage trie walk failed", "account", accountHash, "err", err)
+ totalErrors++
+ return nil
+ }
+ totalStorageTries++
+ totalLeaves += storageStats.Leaves
+ totalExpired += storageStats.ExpiredResolved
+ return nil
+ })
+ if err != nil {
+ return fmt.Errorf("account trie walk failed: %w", err)
+ }
+
+ totalLeaves += accountStats.Leaves
+ totalExpired += accountStats.ExpiredResolved
+
+ log.Info("Archive verification complete",
+ "accounts", totalAccounts,
+ "storageTries", totalStorageTries,
+ "totalLeaves", totalLeaves,
+ "expiredResolved", totalExpired,
+ "errors", totalErrors,
+ "elapsed", common.PrettyDuration(time.Since(start)))
+
+ if totalErrors > 0 {
+ return fmt.Errorf("verification completed with %d errors", totalErrors)
+ }
+ return nil
+}
+
+func archiveDeleteJournal(ctx *cli.Context) error {
+ stack, _ := makeConfigNode(ctx)
+ defer stack.Close()
+
+ chaindb := utils.MakeChainDatabase(ctx, stack, false)
+ defer chaindb.Close()
+
+ // Delete the pathdb journal KV key
+ if err := chaindb.Delete([]byte("TrieJournal")); err != nil {
+ log.Warn("Failed to delete pathdb journal key", "err", err)
+ } else {
+ log.Info("Deleted pathdb journal key (TrieJournal)")
+ }
+
+ // Delete the journal file(s) - check both legacy and current locations
+ for _, dir := range []string{"triedb", ""} {
+ for _, name := range []string{"merkle.journal", "verkle.journal"} {
+ journalFile := filepath.Join(stack.ResolvePath(dir), name)
+ if err := os.Remove(journalFile); err == nil {
+ log.Info("Deleted journal file", "path", journalFile)
+ } else if !os.IsNotExist(err) {
+ log.Warn("Failed to delete journal file", "path", journalFile, "err", err)
+ }
+ }
+ }
+
+ return nil
+}
+
+func archiveCheckNode(ctx *cli.Context) error {
+ stack, _ := makeConfigNode(ctx)
+ defer stack.Close()
+
+ chaindb := utils.MakeChainDatabase(ctx, stack, true)
+ defer chaindb.Close()
+
+ ownerHex := ctx.String(archiveCheckNodeFlag.Name)
+ pathHex := ctx.String(archiveCheckPathFlag.Name)
+
+ if ownerHex == "" {
+ return errors.New("--owner flag is required")
+ }
+
+ owner := common.HexToHash(ownerHex)
+
+ // Parse path: hex nibbles like "08" → []byte{0, 8}
+ var path []byte
+ for _, c := range pathHex {
+ var nibble byte
+ switch {
+ case c >= '0' && c <= '9':
+ nibble = byte(c - '0')
+ case c >= 'a' && c <= 'f':
+ nibble = byte(c-'a') + 10
+ case c >= 'A' && c <= 'F':
+ nibble = byte(c-'A') + 10
+ default:
+ return fmt.Errorf("invalid hex char in path: %c", c)
+ }
+ path = append(path, nibble)
+ }
+
+ log.Info("Checking node in raw DB", "owner", owner, "path", fmt.Sprintf("%x", path))
+
+ // Read the node directly from the raw DB
+ isAccount := owner == (common.Hash{})
+
+ // Check the target path and all prefixes up to root
+ for i := len(path); i >= 0; i-- {
+ subpath := path[:i]
+ var blob []byte
+ if isAccount {
+ blob = rawdb.ReadAccountTrieNode(chaindb, subpath)
+ } else {
+ blob = rawdb.ReadStorageTrieNode(chaindb, owner, subpath)
+ }
+
+ status := "MISSING"
+ details := ""
+ if len(blob) > 0 {
+ if blob[0] == 0x00 {
+ status = "EXPIRED"
+ if len(blob) == 17 {
+ offset := binary.BigEndian.Uint64(blob[1:9])
+ size := binary.BigEndian.Uint64(blob[9:17])
+ details = fmt.Sprintf("offset=%d size=%d", offset, size)
+ }
+ } else {
+ status = fmt.Sprintf("PRESENT (%d bytes, first=0x%02x)", len(blob), blob[0])
+ }
+ }
+ label := "prefix"
+ if i == len(path) {
+ label = "TARGET"
+ }
+ if i == 0 {
+ label = "ROOT"
+ }
+ log.Info("Node check",
+ "label", label,
+ "path", fmt.Sprintf("%x", subpath),
+ "pathLen", i,
+ "status", status,
+ "details", details)
+ }
+
+ // Also check a few child paths to see what's below the target
+ for nibble := byte(0); nibble < 16; nibble++ {
+ childPath := append(append([]byte{}, path...), nibble)
+ var blob []byte
+ if isAccount {
+ blob = rawdb.ReadAccountTrieNode(chaindb, childPath)
+ } else {
+ blob = rawdb.ReadStorageTrieNode(chaindb, owner, childPath)
+ }
+ if len(blob) > 0 {
+ status := fmt.Sprintf("PRESENT (%d bytes, first=0x%02x)", len(blob), blob[0])
+ if blob[0] == 0x00 && len(blob) == 17 {
+ offset := binary.BigEndian.Uint64(blob[1:9])
+ size := binary.BigEndian.Uint64(blob[9:17])
+ status = fmt.Sprintf("EXPIRED offset=%d size=%d", offset, size)
+ }
+ log.Info("Child node", "path", fmt.Sprintf("%x", childPath), "status", status)
+ }
+ }
+
+ return nil
+}
+
+// cycleCheckScheme returns the state scheme for the database.
+// It's a helper to check what scheme is in use.
+func cycleCheckScheme(ctx *cli.Context, db ethdb.Database) string {
+ return rawdb.ReadStateScheme(db)
+}
diff --git a/cmd/geth/main.go b/cmd/geth/main.go
index 5e90164aaa..528bd8400d 100644
--- a/cmd/geth/main.go
+++ b/cmd/geth/main.go
@@ -240,6 +240,8 @@ func init() {
dumpConfigCommand,
// see dbcmd.go
dbCommand,
+ // See archivecmd.go
+ archiveCommand,
// See cmd/utils/flags_legacy.go
utils.ShowDeprecated,
// See snapshot.go
diff --git a/node/node.go b/node/node.go
index 7c0d69775c..d9c38c4b6c 100644
--- a/node/node.go
+++ b/node/node.go
@@ -38,6 +38,7 @@ import (
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/p2p"
"github.com/ethereum/go-ethereum/rpc"
+ "github.com/ethereum/go-ethereum/trie/archive"
"github.com/gofrs/flock"
)
@@ -85,6 +86,7 @@ func New(conf *Config) (*Node, error) {
return nil, err
}
conf.DataDir = absdatadir
+ archive.ArchiveDataDir = absdatadir
}
if conf.Logger == nil {
conf.Logger = log.New()
diff --git a/trie/archive/archive.go b/trie/archive/archive.go
new file mode 100644
index 0000000000..d4f4fb2382
--- /dev/null
+++ b/trie/archive/archive.go
@@ -0,0 +1,95 @@
+// Copyright 2026 go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package archive
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+
+ "github.com/ethereum/go-ethereum/rlp"
+)
+
+// ResolverFn is a callback to resolve expired nodes from an archive file.
+// Given an offset and size, it returns the serialized node data from the archive.
+type ResolverFn func(offset, size uint64) ([]*Record, error)
+
+// OffsetSize is the size of the file offset in bytes.
+const OffsetSize = 8
+
+var (
+ EmptyArchiveRecord = errors.New("empty record") // The archive contained a size-zero record.
+ ErrNoResolver = errors.New("no archive resolver set for expired node") // An expired node is accessed without a resolver.
+)
+
+// Record contains an archive file record. It is not the most optimal
+// structure, since any modification to it will need to be overwritten.
+type Record struct {
+ Path []byte
+ Value []byte
+}
+
+// ArchiveDataDir is the data directory where the archive file is stored.
+var ArchiveDataDir string
+
+// ArchivedNodeResolver takes a buffer containing the archive data
+// held by an expiring node (an offset and a size) and returns a
+// list of records, which is a list of serialized leaf nodes. The
+// caller knows the context (MPT, binary trie) and is responsible
+// for decoding the nodes.
+func ArchivedNodeResolver(offset, size uint64) ([]*Record, error) {
+ file, err := os.Open(filepath.Join(ArchiveDataDir, "geth", "nodearchive"))
+ if err != nil {
+ return nil, fmt.Errorf("error opening archive file: %w", err)
+ }
+ defer file.Close()
+
+ o, err := file.Seek(int64(offset), io.SeekStart)
+ if err != nil {
+ return nil, fmt.Errorf("error seeking into archive file: %w", err)
+ }
+ if uint64(o) != offset {
+ return nil, fmt.Errorf("invalid offset: want %d, got %d", offset, o)
+ }
+
+ data := make([]byte, size)
+ if _, err := io.ReadFull(file, data); err != nil {
+ return nil, fmt.Errorf("error reading data from archive: %w", err)
+ }
+
+ var records []*Record
+ stream := rlp.NewStream(bytes.NewReader(data), uint64(len(data)))
+ for len(data) > 0 {
+ _, size, err := stream.Kind()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return nil, fmt.Errorf("error getting rlp kind from archive data: %w", err)
+ }
+ var record Record
+ err = stream.Decode(&record)
+ if err != nil {
+ return nil, fmt.Errorf("error decoding rlp record from archive data (offset=%d, size=%d): %w", offset, size, err)
+ }
+ records = append(records, &record)
+ }
+ return records, nil
+}
diff --git a/trie/archive/writer.go b/trie/archive/writer.go
new file mode 100644
index 0000000000..98b4ecce4b
--- /dev/null
+++ b/trie/archive/writer.go
@@ -0,0 +1,92 @@
+// Copyright 2026 go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package archive
+
+import (
+ "os"
+ "sync"
+
+ "github.com/ethereum/go-ethereum/rlp"
+)
+
+// ArchiveWriter is an append-only writer for archive files.
+// It writes RLP-encoded records to a file and tracks the current offset.
+type ArchiveWriter struct {
+ file *os.File
+ offset uint64
+ mu sync.Mutex
+}
+
+// NewArchiveWriter creates a new archive writer that appends to the given file.
+// If the file exists, it will be opened in append mode and writing continues
+// from the current end of file. If it doesn't exist, it will be created.
+func NewArchiveWriter(path string) (*ArchiveWriter, error) {
+ file, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+ if err != nil {
+ return nil, err
+ }
+ info, err := file.Stat()
+ if err != nil {
+ file.Close()
+ return nil, err
+ }
+ return &ArchiveWriter{
+ file: file,
+ offset: uint64(info.Size()),
+ }, nil
+}
+
+// WriteSubtree writes all records belonging to a subtree and returns
+// the starting offset and total size of the written data.
+// This is the atomic unit of archival - all records for a subtree are
+// written together and can be retrieved together using the returned
+// offset and size.
+func (w *ArchiveWriter) WriteSubtree(records []*Record) (offset uint64, size uint64, err error) {
+ w.mu.Lock()
+ defer w.mu.Unlock()
+
+ startOffset := w.offset
+ for _, rec := range records {
+ encoded, err := rlp.EncodeToBytes(rec)
+ if err != nil {
+ return 0, 0, err
+ }
+ if _, err := w.file.Write(encoded); err != nil {
+ return 0, 0, err
+ }
+ w.offset += uint64(len(encoded))
+ }
+ return startOffset, w.offset - startOffset, nil
+}
+
+// Sync flushes the file to disk. This should be called after writing
+// a subtree and before modifying the database to ensure crash consistency.
+func (w *ArchiveWriter) Sync() error {
+ return w.file.Sync()
+}
+
+// Close closes the archive file.
+func (w *ArchiveWriter) Close() error {
+ return w.file.Close()
+}
+
+// Offset returns the current write offset in the file.
+func (w *ArchiveWriter) Offset() uint64 {
+ w.mu.Lock()
+ defer w.mu.Unlock()
+ return w.offset
+}
diff --git a/trie/archiver.go b/trie/archiver.go
new file mode 100644
index 0000000000..32b2eae711
--- /dev/null
+++ b/trie/archiver.go
@@ -0,0 +1,599 @@
+// Copyright 2026 go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package trie
+
+import (
+ "encoding/binary"
+ "fmt"
+ "time"
+
+ "github.com/ethereum/go-ethereum/common"
+ "github.com/ethereum/go-ethereum/core/rawdb"
+ "github.com/ethereum/go-ethereum/core/types"
+ "github.com/ethereum/go-ethereum/ethdb"
+ "github.com/ethereum/go-ethereum/log"
+ "github.com/ethereum/go-ethereum/rlp"
+ "github.com/ethereum/go-ethereum/trie/archive"
+ "github.com/ethereum/go-ethereum/triedb/database"
+)
+
+// subtreeInfo holds information about a subtree to be archived.
+// It contains all the data needed to write the subtree to an archive
+// and replace it with an expiredNode in the database.
+type subtreeInfo struct {
+ path []byte // Hex-encoded path to subtree root
+ owner common.Hash // Zero for account trie, account hash for storage
+ height int // Height of subtree (from leaves)
+ leaves []*archive.Record // All leaf records (relative path + encoded node)
+ nodePaths [][]byte // Paths of all nodes to delete
+ rootHash common.Hash // Hash of the original subtree root (for verification)
+}
+
+// Archiver handles the archival process of trie nodes.
+// It walks the state trie, identifies subtrees at height 3,
+// archives their leaf data, and replaces them with expiredNode markers.
+//
+// The archiver uses a streaming approach: it walks the trie using a
+// NodeIterator, probes each node's height via bounded raw DB reads,
+// and archives subtrees immediately when found. This keeps memory
+// usage proportional to the iterator stack depth + the current subtree
+// being processed, rather than loading the entire trie into memory.
+type Archiver struct {
+ db ethdb.Database
+ triedb database.NodeDatabase
+ writer *archive.ArchiveWriter
+ compactionInterval uint64
+ dryRun bool
+ stateRoot common.Hash
+
+ // Progress tracking
+ subtreesArchived uint64
+ bytesDeleted uint64
+ leavesArchived uint64
+ lastCompaction uint64
+}
+
+// NewArchiver creates a new archiver instance.
+//
+// Parameters:
+// - db: The underlying key-value database
+// - triedb: The trie database for reading nodes
+// - writer: Archive file writer (can be nil for dry run)
+// - compactionInterval: Run compaction after this many subtrees (0 = disable)
+// - dryRun: If true, don't modify the database
+func NewArchiver(db ethdb.Database, triedb database.NodeDatabase,
+ writer *archive.ArchiveWriter, compactionInterval uint64, dryRun bool) *Archiver {
+ return &Archiver{
+ db: db,
+ triedb: triedb,
+ writer: writer,
+ compactionInterval: compactionInterval,
+ dryRun: dryRun,
+ }
+}
+
+// ProcessState archives subtrees from the given state root.
+// It processes storage tries first, then the account trie.
+func (a *Archiver) ProcessState(root common.Hash) error {
+ a.stateRoot = root
+
+ accountTrie, err := New(StateTrieID(root), a.triedb)
+ if err != nil {
+ return fmt.Errorf("failed to open account trie: %w", err)
+ }
+
+ log.Info("Processing storage tries")
+ iter, err := accountTrie.NodeIterator(nil)
+ if err != nil {
+ return fmt.Errorf("failed to create account iterator: %w", err)
+ }
+
+ kvIter := NewIterator(iter)
+ for kvIter.Next() {
+ // Decode the account to check for storage
+ var acc types.StateAccount
+ if err := rlp.DecodeBytes(kvIter.Value, &acc); err != nil {
+ log.Warn("Failed to decode account", "err", err)
+ continue
+ }
+ if acc.Root == types.EmptyRootHash {
+ continue
+ }
+
+ // Process this account's storage trie
+ accountHash := common.BytesToHash(kvIter.Key)
+ storageID := StorageTrieID(root, accountHash, acc.Root)
+ storageTrie, err := New(storageID, a.triedb)
+ if err != nil {
+ log.Warn("Failed to open storage trie", "account", accountHash, "err", err)
+ continue
+ }
+
+ if err := a.processTrie(accountHash, storageTrie); err != nil {
+ log.Warn("Failed to process storage trie", "account", accountHash, "err", err)
+ }
+ }
+
+ if kvIter.Err != nil {
+ return fmt.Errorf("account iteration error: %w", kvIter.Err)
+ }
+
+ log.Info("Processing account trie", "root", root)
+ if err := a.processTrie(common.Hash{}, accountTrie); err != nil {
+ return fmt.Errorf("failed to process account trie: %w", err)
+ }
+
+ return nil
+}
+
+// processTrie finds and archives all height-3 subtrees in the trie using
+// a streaming approach. It walks the trie with a NodeIterator, probes each
+// node's height via bounded raw DB reads, and archives subtrees immediately.
+//
+// Memory usage is O(iterator_stack_depth + current_subtree_size) instead of
+// O(entire_trie) as with the previous recursive approach.
+func (a *Archiver) processTrie(owner common.Hash, t *Trie) error {
+ if t.root == nil {
+ return nil
+ }
+
+ iter, err := t.NodeIterator(nil)
+ if err != nil {
+ return fmt.Errorf("failed to create node iterator: %w", err)
+ }
+
+ var (
+ lastLog = time.Now()
+ found uint64
+ )
+
+ for iter.Next(true) {
+ if iter.Leaf() {
+ continue
+ }
+
+ // Progress logging
+ if time.Since(lastLog) > 30*time.Second {
+ log.Info("Scanning trie for subtrees",
+ "owner", owner,
+ "path", common.Bytes2Hex(iter.Path()),
+ "found", found,
+ "archived", a.subtreesArchived)
+ lastLog = time.Now()
+ }
+
+ path := copyBytes(iter.Path())
+ hash := iter.Hash()
+ if hash == (common.Hash{}) {
+ // Embedded node (no hash), skip — it will be part of a
+ // parent subtree.
+ continue
+ }
+
+ // Probe subtree height via bounded raw DB reads.
+ // This does NOT load the trie into memory — it reads blobs from
+ // the DB, decodes them, computes height, and discards them.
+ height := a.probeHeight(owner, path, hash, 3)
+ if height != 3 {
+ // Too small to archive; the iterator will visit children.
+ // Too tall — descend into children to find height-3 subtrees.
+ continue
+ }
+
+ // height == 3: collect and archive this subtree immediately.
+ info := a.collectSubtree(owner, path, hash)
+ if info == nil {
+ continue
+ }
+ found++
+
+ if err := a.archiveSubtree(info); err != nil {
+ log.Warn("Failed to archive subtree", "path", common.Bytes2Hex(path), "err", err)
+ continue
+ }
+ a.subtreesArchived++
+ a.leavesArchived += uint64(len(info.leaves))
+
+ if err := a.maybeCompact(); err != nil {
+ log.Warn("Compaction failed", "err", err)
+ }
+
+ // Skip children — they're now archived.
+ // We call Next(false) to move past the subtree without descending.
+ iter.Next(false)
+ }
+
+ if iter.Error() != nil {
+ return fmt.Errorf("iterator error: %w", iter.Error())
+ }
+
+ log.Info("Found subtrees to archive", "owner", owner, "count", found)
+ return nil
+}
+
+// probeHeight computes the height of a node by reading from the raw DB.
+// It stops early once height exceeds maxHeight (returns maxHeight+1).
+// The decoded nodes are not retained — they are discarded after inspection.
+//
+// Height is measured from leaves: leaves=0, their parents=1, etc.
+func (a *Archiver) probeHeight(owner common.Hash, path []byte, hash common.Hash, maxHeight int) int {
+ blob := a.readNodeBlob(owner, path)
+ if len(blob) == 0 {
+ return 0
+ }
+
+ // Already expired — skip.
+ if blob[0] == expiredNodeMarker {
+ return -1
+ }
+
+ n, err := decodeNodeUnsafe(hash[:], blob)
+ if err != nil {
+ return 0
+ }
+
+ return a.nodeHeight(n, path, owner, maxHeight)
+}
+
+// nodeHeight computes the height of a decoded node, bounded by maxHeight.
+// Returns maxHeight+1 early if the subtree is taller than maxHeight.
+func (a *Archiver) nodeHeight(n node, path []byte, owner common.Hash, maxHeight int) int {
+ switch n := n.(type) {
+ case nil:
+ return 0
+
+ case valueNode:
+ return 0
+
+ case *shortNode:
+ childPath := append(append([]byte{}, path...), n.Key...)
+ switch child := n.Val.(type) {
+ case valueNode:
+ return 1 // shortNode → leaf
+ case hashNode:
+ if maxHeight <= 1 {
+ return maxHeight + 1
+ }
+ childHeight := a.probeHeight(owner, childPath, common.BytesToHash(child), maxHeight-1)
+ if childHeight < 0 {
+ return -1 // expired child
+ }
+ return childHeight + 1
+ default:
+ // Inline node
+ childHeight := a.nodeHeight(child, childPath, owner, maxHeight-1)
+ if childHeight < 0 {
+ return -1
+ }
+ return childHeight + 1
+ }
+
+ case *fullNode:
+ maxH := 0
+ for i, child := range n.Children[:16] {
+ if child == nil {
+ continue
+ }
+ childPath := append(append([]byte{}, path...), byte(i))
+ var childHeight int
+ switch c := child.(type) {
+ case valueNode:
+ childHeight = 0
+ case hashNode:
+ if maxH+1 > maxHeight {
+ return maxHeight + 1
+ }
+ childHeight = a.probeHeight(owner, childPath, common.BytesToHash(c), maxHeight-1)
+ default:
+ childHeight = a.nodeHeight(c, childPath, owner, maxHeight-1)
+ }
+ if childHeight < 0 {
+ continue // expired child, skip
+ }
+ h := childHeight + 1
+ if h > maxH {
+ maxH = h
+ }
+ if maxH > maxHeight {
+ return maxHeight + 1
+ }
+ }
+ return maxH
+
+ case hashNode:
+ return a.probeHeight(owner, path, common.BytesToHash(n), maxHeight)
+
+ case *expiredNode:
+ return -1
+ }
+ return 0
+}
+
+// collectSubtree reads a height-3 subtree from the raw DB and collects its
+// leaves and node paths for archival. The subtree is bounded (height ≤ 3),
+// so memory usage is limited.
+func (a *Archiver) collectSubtree(owner common.Hash, path []byte, hash common.Hash) *subtreeInfo {
+ blob := a.readNodeBlob(owner, path)
+ if len(blob) == 0 {
+ return nil
+ }
+ if blob[0] == expiredNodeMarker {
+ return nil
+ }
+
+ n, err := decodeNodeUnsafe(hash[:], blob)
+ if err != nil {
+ log.Warn("Failed to decode node for collection", "path", common.Bytes2Hex(path), "err", err)
+ return nil
+ }
+
+ info := &subtreeInfo{
+ path: copyBytes(path),
+ owner: owner,
+ rootHash: hash,
+ }
+
+ leaves, nodePaths, height, err := a.collectNodeLeaves(n, path, nil, owner)
+ if err != nil {
+ log.Warn("Failed to collect subtree leaves", "path", common.Bytes2Hex(path), "err", err)
+ return nil
+ }
+
+ info.height = height
+ info.leaves = leaves
+ info.nodePaths = append([][]byte{copyBytes(path)}, nodePaths...)
+ return info
+}
+
+// collectNodeLeaves recursively collects all leaves and node paths in a
+// bounded subtree. relPath is the path relative to the subtree root.
+// Returns (leaves, nodePaths, height, error).
+func (a *Archiver) collectNodeLeaves(n node, absPath, relPath []byte, owner common.Hash) ([]*archive.Record, [][]byte, int, error) {
+ switch n := n.(type) {
+ case nil:
+ return nil, nil, 0, nil
+
+ case valueNode:
+ return []*archive.Record{{
+ Path: copyBytes(relPath),
+ Value: []byte(n),
+ }}, nil, 0, nil
+
+ case *shortNode:
+ childAbsPath := append(append([]byte{}, absPath...), n.Key...)
+ var childNode node
+ switch c := n.Val.(type) {
+ case hashNode:
+ resolved, err := a.resolveRawNode(owner, childAbsPath, common.BytesToHash(c))
+ if err != nil {
+ return nil, nil, 0, fmt.Errorf("resolve shortNode child at %s: %w", common.Bytes2Hex(childAbsPath), err)
+ }
+ childNode = resolved
+ default:
+ childNode = c
+ }
+
+ // Pass nil relPath to child — we prepend the key ourselves
+ leaves, nodePaths, height, err := a.collectNodeLeaves(childNode, childAbsPath, nil, owner)
+ if err != nil {
+ return nil, nil, 0, err
+ }
+
+ // Prepend [relPath + extension key] to leaf relative paths
+ prefix := append(append([]byte{}, relPath...), n.Key...)
+ for _, leaf := range leaves {
+ leaf.Path = append(append([]byte{}, prefix...), leaf.Path...)
+ }
+
+ return leaves, append([][]byte{copyBytes(absPath)}, nodePaths...), height + 1, nil
+
+ case *fullNode:
+ var (
+ allLeaves []*archive.Record
+ allPaths [][]byte
+ maxHeight int
+ )
+ for i, child := range n.Children[:16] {
+ if child == nil {
+ continue
+ }
+ childAbsPath := append(append([]byte{}, absPath...), byte(i))
+
+ var childNode node
+ switch c := child.(type) {
+ case hashNode:
+ resolved, err := a.resolveRawNode(owner, childAbsPath, common.BytesToHash(c))
+ if err != nil {
+ return nil, nil, 0, fmt.Errorf("resolve fullNode child[%x] at %s: %w", i, common.Bytes2Hex(childAbsPath), err)
+ }
+ childNode = resolved
+ default:
+ childNode = c
+ }
+
+ // Pass nil relPath to child — we prepend the index ourselves
+ leaves, nodePaths, height, err := a.collectNodeLeaves(childNode, childAbsPath, nil, owner)
+ if err != nil {
+ return nil, nil, 0, err
+ }
+
+ // Prepend [relPath + branch index] to leaf relative paths
+ prefix := append(append([]byte{}, relPath...), byte(i))
+ for _, leaf := range leaves {
+ leaf.Path = append(append([]byte{}, prefix...), leaf.Path...)
+ }
+
+ allLeaves = append(allLeaves, leaves...)
+ allPaths = append(allPaths, nodePaths...)
+ h := height + 1
+ if h > maxHeight {
+ maxHeight = h
+ }
+ }
+ return allLeaves, allPaths, maxHeight, nil
+
+ case hashNode:
+ resolved, err := a.resolveRawNode(owner, absPath, common.BytesToHash(n))
+ if err != nil {
+ return nil, nil, 0, err
+ }
+ return a.collectNodeLeaves(resolved, absPath, relPath, owner)
+
+ case *expiredNode:
+ return nil, nil, 0, nil
+ }
+ return nil, nil, 0, nil
+}
+
+// readNodeBlob reads a trie node blob directly from the raw key-value
+// database, bypassing pathdb layers.
+func (a *Archiver) readNodeBlob(owner common.Hash, path []byte) []byte {
+ if owner == (common.Hash{}) {
+ return rawdb.ReadAccountTrieNode(a.db, path)
+ }
+ return rawdb.ReadStorageTrieNode(a.db, owner, path)
+}
+
+// resolveRawNode reads and decodes a trie node directly from the raw DB.
+// Unlike resolveNode, this does NOT use the trie database (no caching,
+// no diff layers). The decoded node is ephemeral and will be GC'd after use.
+func (a *Archiver) resolveRawNode(owner common.Hash, path []byte, hash common.Hash) (node, error) {
+ blob := a.readNodeBlob(owner, path)
+ if len(blob) == 0 {
+ return nil, fmt.Errorf("node not found: owner=%s path=%s", owner, common.Bytes2Hex(path))
+ }
+ if blob[0] == expiredNodeMarker {
+ return &expiredNode{}, nil
+ }
+ return decodeNodeUnsafe(hash[:], blob)
+}
+
+// archiveSubtree writes leaves to archive and replaces subtree with expiredNode.
+func (a *Archiver) archiveSubtree(info *subtreeInfo) error {
+ if a.dryRun {
+ log.Info("Would archive subtree",
+ "path", common.Bytes2Hex(info.path),
+ "owner", info.owner,
+ "height", info.height,
+ "leaves", len(info.leaves),
+ "nodes", len(info.nodePaths))
+ return nil
+ }
+
+ // 1. Write to archive file
+ offset, size, err := a.writer.WriteSubtree(info.leaves)
+ if err != nil {
+ return fmt.Errorf("failed to write subtree to archive: %w", err)
+ }
+
+ // 2. Sync to ensure durability before modifying DB
+ if err := a.writer.Sync(); err != nil {
+ return fmt.Errorf("failed to sync archive: %w", err)
+ }
+
+ // 3. Verify archive round-trip: reconstruct trie from records and
+ // check that the hash matches the original subtree root. This
+ // catches any data corruption before we delete the original nodes.
+ if info.rootHash != (common.Hash{}) {
+ reconstructed, err := archiveRecordsToNode(info.leaves)
+ if err != nil {
+ return fmt.Errorf("archive verification failed: cannot reconstruct trie from records: %w", err)
+ }
+ h := newHasher(false)
+ gotHash := common.BytesToHash(h.hash(reconstructed, true))
+ returnHasherToPool(h)
+ if gotHash != info.rootHash {
+ return fmt.Errorf("archive verification failed: hash mismatch at path %s owner %s: got %s want %s (leaves=%d offset=%d size=%d)",
+ common.Bytes2Hex(info.path), info.owner, gotHash, info.rootHash,
+ len(info.leaves), offset, size)
+ }
+ }
+
+ // 4. Batch database operations
+ batch := a.db.NewBatch()
+
+ // Delete all nodes in subtree (except the root which we'll overwrite)
+ for _, nodePath := range info.nodePaths[1:] { // Skip first (root)
+ if info.owner == (common.Hash{}) {
+ rawdb.DeleteAccountTrieNode(batch, nodePath)
+ } else {
+ rawdb.DeleteStorageTrieNode(batch, info.owner, nodePath)
+ }
+ a.bytesDeleted += uint64(len(nodePath))
+ }
+
+ // Write expiredNode at subtree root
+ expiredBlob := encodeExpiredNodeBlob(offset, size)
+ if info.owner == (common.Hash{}) {
+ rawdb.WriteAccountTrieNode(batch, info.path, expiredBlob)
+ } else {
+ rawdb.WriteStorageTrieNode(batch, info.owner, info.path, expiredBlob)
+ }
+
+ if err := batch.Write(); err != nil {
+ return fmt.Errorf("failed to write batch: %w", err)
+ }
+
+ log.Debug("Archived subtree",
+ "path", common.Bytes2Hex(info.path),
+ "owner", info.owner,
+ "leaves", len(info.leaves),
+ "offset", offset,
+ "size", size)
+
+ return nil
+}
+
+// maybeCompact runs database compaction if the threshold is reached.
+func (a *Archiver) maybeCompact() error {
+ if a.compactionInterval == 0 {
+ return nil
+ }
+ if a.subtreesArchived-a.lastCompaction >= a.compactionInterval {
+ log.Info("Running database compaction", "subtrees", a.subtreesArchived)
+ if err := a.db.Compact(nil, nil); err != nil {
+ return err
+ }
+ a.lastCompaction = a.subtreesArchived
+ }
+ return nil
+}
+
+// encodeExpiredNodeBlob creates the raw bytes for an expiredNode.
+// Format: 1-byte marker (0x00) + 8-byte offset + 8-byte size = 17 bytes
+func encodeExpiredNodeBlob(offset, size uint64) []byte {
+ buf := make([]byte, 1+2*archive.OffsetSize) // 17 bytes
+ buf[0] = expiredNodeMarker // 0x00
+ binary.BigEndian.PutUint64(buf[1:], offset)
+ binary.BigEndian.PutUint64(buf[1+archive.OffsetSize:], size)
+ return buf
+}
+
+// Stats returns archival statistics.
+func (a *Archiver) Stats() (subtrees, leaves, bytesDeleted uint64) {
+ return a.subtreesArchived, a.leavesArchived, a.bytesDeleted
+}
+
+// copyBytes returns a copy of the given byte slice.
+func copyBytes(b []byte) []byte {
+ if b == nil {
+ return nil
+ }
+ c := make([]byte, len(b))
+ copy(c, b)
+ return c
+}
diff --git a/trie/committer.go b/trie/committer.go
index 2a2142e0ff..7ea4e690cf 100644
--- a/trie/committer.go
+++ b/trie/committer.go
@@ -79,6 +79,8 @@ func (c *committer) commit(path []byte, n node, parallel bool) node {
return cn
case hashNode:
return cn
+ case *expiredNode:
+ return cn
default:
// nil, valuenode shouldn't be committed
panic(fmt.Sprintf("%T: invalid node: %v", n, n))
diff --git a/trie/expired_node.go b/trie/expired_node.go
new file mode 100644
index 0000000000..ce622daa03
--- /dev/null
+++ b/trie/expired_node.go
@@ -0,0 +1,262 @@
+// Copyright 2026 go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package trie
+
+import (
+ "bytes"
+ "encoding/binary"
+ "fmt"
+ "time"
+
+ "github.com/ethereum/go-ethereum/log"
+ "github.com/ethereum/go-ethereum/rlp"
+ "github.com/ethereum/go-ethereum/trie/archive"
+)
+
+// expiredNodeMarker is a special marker byte to identify expired nodes.
+// Using 0x00 as a marker since valid MPT nodes are always RLP lists (starting with 0xc0+).
+const expiredNodeMarker = 0x00
+
+// expiredNode represents a node whose data has been archived.
+// It stores the file offset and size of the archived data.
+type expiredNode struct {
+ offset uint64
+ size uint64
+ cachedHash hashNode
+ archiveResolver archive.ResolverFn
+}
+
+func (n *expiredNode) cache() (hashNode, bool) {
+ return n.cachedHash, n.cachedHash == nil
+}
+
+func (n *expiredNode) encode(w rlp.EncoderBuffer) {
+ var buf [1 + 2*archive.OffsetSize]byte
+ buf[0] = expiredNodeMarker
+ binary.BigEndian.PutUint64(buf[1:], n.offset)
+ binary.BigEndian.PutUint64(buf[1+archive.OffsetSize:], n.size)
+ w.Write(buf[:])
+}
+
+func (n *expiredNode) fstring(ind string) string {
+ return fmt.Sprintf(" ", n.offset, n.size)
+}
+
+// Offset returns the archive file offset for this expired node.
+func (n *expiredNode) Offset() uint64 {
+ return n.offset
+}
+
+// SetArchiveResolver sets the resolver function for this expired node.
+func (n *expiredNode) SetArchiveResolver(resolver archive.ResolverFn) {
+ n.archiveResolver = resolver
+}
+
+// resolveExpiredNodeData resolves an expired node from the archive, verifies
+// the reconstructed subtree hash, and stamps the cached hash onto the root.
+// Returns an error if the archive data is corrupted (hash mismatch).
+func resolveExpiredNodeData(n *expiredNode) (node, error) {
+ start := time.Now()
+ records, err := archive.ArchivedNodeResolver(n.offset, n.size)
+ if err != nil {
+ return nil, fmt.Errorf("failed to resolve expired node: %w", err)
+ }
+ resolved, err := archiveRecordsToNode(records)
+ if err != nil {
+ return nil, fmt.Errorf("failed to rebuild expired node from archive: %w", err)
+ }
+ depth := subtreeDepth(resolved)
+ log.Debug("Resurrected expired node from archive",
+ "offset", n.offset, "archiveBytes", n.size,
+ "records", len(records), "depth", depth,
+ "elapsed", time.Since(start))
+ // Verify hash integrity: if the original hash is known, check that the
+ // reconstructed subtree produces the same hash. A mismatch means the
+ // archive is corrupted (e.g. missing leaves due to unresolvable hashNodes
+ // during archival) and any data from it is unreliable.
+ if n.cachedHash != nil {
+ h := newHasher(false)
+ gotHash := h.hash(resolved, true)
+ returnHasherToPool(h)
+ if !bytes.Equal(gotHash, n.cachedHash) {
+ return nil, fmt.Errorf("expired node hash mismatch at offset=%d size=%d: archive data is corrupted (expected %x got %x, %d records)",
+ n.offset, n.size, []byte(n.cachedHash), gotHash, len(records))
+ }
+ // Stamp the original hash onto the resolved subtree root so the
+ // hasher returns it directly instead of re-computing.
+ switch nn := resolved.(type) {
+ case *fullNode:
+ nn.flags.hash = n.cachedHash
+ case *shortNode:
+ nn.flags.hash = n.cachedHash
+ }
+ }
+ // Mark the entire resolved subtree as dirty. This is critical for
+ // correctness with pathdb's diff layer model: when a trie with expired
+ // nodes is modified and committed, the committer only captures dirty
+ // nodes into the NodeSet (which becomes the diff layer). Without this
+ // marking, resolved-but-unmodified sibling nodes within the subtree
+ // would exist nowhere — not in any diff layer (they're clean) and not
+ // in the raw DB (the archiver deleted them). Subsequent trie accesses
+ // from higher diff layers would fall through to the disk layer, find
+ // nothing, and produce MissingNodeError.
+ //
+ // For read-only tries (only get operations, no commit), this dirty
+ // marking is harmless — the nodes are discarded when the trie is GC'd.
+ markSubtreeDirty(resolved)
+ return resolved, nil
+}
+
+// subtreeDepth returns the maximum depth of a trie subtree.
+func subtreeDepth(n node) int {
+ switch n := n.(type) {
+ case *fullNode:
+ max := 0
+ for _, child := range &n.Children {
+ if child != nil {
+ if d := subtreeDepth(child); d > max {
+ max = d
+ }
+ }
+ }
+ return 1 + max
+ case *shortNode:
+ return 1 + subtreeDepth(n.Val)
+ default:
+ return 0
+ }
+}
+
+// markSubtreeDirty recursively marks all fullNode and shortNode in the
+// subtree as dirty, preserving any cached hashes. This ensures the
+// committer will capture them in the NodeSet during trie commit.
+func markSubtreeDirty(n node) {
+ switch n := n.(type) {
+ case *fullNode:
+ n.flags.dirty = true
+ for _, child := range n.Children[:16] {
+ if child != nil {
+ markSubtreeDirty(child)
+ }
+ }
+ case *shortNode:
+ n.flags.dirty = true
+ markSubtreeDirty(n.Val)
+ }
+ // valueNode, hashNode, nil: no flags to mark
+}
+
+func archiveRecordsToNode(records []*archive.Record) (node, error) {
+ if len(records) == 0 {
+ return nil, archive.EmptyArchiveRecord
+ }
+
+ // Build the trie incrementally from nil to produce the canonical
+ // MPT structure. Starting with a fullNode would be wrong when the
+ // original subtree root was a shortNode (shared prefix).
+ var root node
+ for i, record := range records {
+ if err := validateRecordPath(record.Path); err != nil {
+ return nil, err
+ }
+
+ key, err := normalizeRecordKey(record.Path)
+ if err != nil {
+ return nil, err
+ }
+ if len(key) < 1 {
+ return nil, fmt.Errorf("empty key in record #%d", i)
+ }
+ root, err = insertTrieNode(root, key, valueNode(record.Value))
+ if err != nil {
+ return nil, err
+ }
+ }
+ return root, nil
+}
+
+func validateRecordPath(path []byte) error {
+ for i, b := range path {
+ if b > 16 {
+ return fmt.Errorf("invalid nibble in record path: %d", b)
+ }
+ if b == 16 && i != len(path)-1 {
+ return fmt.Errorf("terminator nibble in middle of record path")
+ }
+ }
+ return nil
+}
+
+// normalizeRecordKey ensures the record path is a hex-nibble key suitable for
+// leaf insertion by guaranteeing a single terminator nibble and preserving any
+// already-terminated path. Empty paths are normalized to a sole terminator.
+func normalizeRecordKey(path []byte) ([]byte, error) {
+ if len(path) == 0 {
+ return []byte{16}, nil
+ }
+ if hasTerm(path) {
+ return path, nil
+ }
+ key := append([]byte{}, path...)
+ key = append(key, 16)
+ return key, nil
+}
+
+func insertTrieNode(n node, key []byte, value node) (node, error) {
+ if len(key) == 0 {
+ return value, nil
+ }
+ switch n := n.(type) {
+ case *shortNode:
+ matchlen := prefixLen(key, n.Key)
+ if matchlen == len(n.Key) {
+ nn, err := insertTrieNode(n.Val, key[matchlen:], value)
+ if err != nil {
+ return nil, err
+ }
+ return &shortNode{Key: n.Key, Val: nn}, nil
+ }
+ branch := &fullNode{}
+ var err error
+ branch.Children[n.Key[matchlen]], err = insertTrieNode(nil, n.Key[matchlen+1:], n.Val)
+ if err != nil {
+ return nil, err
+ }
+ branch.Children[key[matchlen]], err = insertTrieNode(nil, key[matchlen+1:], value)
+ if err != nil {
+ return nil, err
+ }
+ if matchlen == 0 {
+ return branch, nil
+ }
+ return &shortNode{Key: key[:matchlen], Val: branch}, nil
+
+ case *fullNode:
+ child, err := insertTrieNode(n.Children[key[0]], key[1:], value)
+ if err != nil {
+ return nil, err
+ }
+ n.Children[key[0]] = child
+ return n, nil
+
+ case nil:
+ return &shortNode{Key: key, Val: value}, nil
+
+ default:
+ return nil, fmt.Errorf("invalid node type in trie insert: %T", n)
+ }
+}
diff --git a/trie/expired_node_test.go b/trie/expired_node_test.go
new file mode 100644
index 0000000000..40b3b056b4
--- /dev/null
+++ b/trie/expired_node_test.go
@@ -0,0 +1,601 @@
+// Copyright 2026 go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package trie
+
+import (
+ "bytes"
+ "errors"
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/ethereum/go-ethereum/rlp"
+ "github.com/ethereum/go-ethereum/trie/archive"
+)
+
+// setupTestArchive creates a temporary archive directory with an archive file
+// containing the given records, and configures archive.ArchiveDataDir to point
+// to it. It returns the offset and size of the written data, and a cleanup function.
+func setupTestArchive(t *testing.T, records []*archive.Record) (offset, size uint64, cleanup func()) {
+ t.Helper()
+ tmpDir := t.TempDir()
+ gethDir := filepath.Join(tmpDir, "geth")
+ if err := os.MkdirAll(gethDir, 0755); err != nil {
+ t.Fatal(err)
+ }
+
+ writer, err := archive.NewArchiveWriter(filepath.Join(gethDir, "nodearchive"))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ offset, size, err = writer.WriteSubtree(records)
+ if err != nil {
+ writer.Close()
+ t.Fatal(err)
+ }
+ writer.Close()
+
+ oldDir := archive.ArchiveDataDir
+ archive.ArchiveDataDir = tmpDir
+
+ return offset, size, func() {
+ archive.ArchiveDataDir = oldDir
+ }
+}
+
+func TestExpiredNodeEncodeDecode(t *testing.T) {
+ testCases := []struct {
+ offset uint64
+ size uint64
+ }{
+ {0, 0},
+ {1, 100},
+ {255, 1024},
+ {256, 4096},
+ {1 << 16, 1 << 20},
+ {1 << 32, 1 << 32},
+ {1<<64 - 1, 1<<64 - 1},
+ }
+
+ for _, tc := range testCases {
+ original := &expiredNode{offset: tc.offset, size: tc.size}
+
+ w := rlp.NewEncoderBuffer(nil)
+ original.encode(w)
+ encoded := w.ToBytes()
+ w.Flush()
+
+ decoded, err := decodeNodeUnsafe(nil, encoded)
+ if err != nil {
+ t.Fatalf("failed to decode expired node with offset %d, size %d: %v", tc.offset, tc.size, err)
+ }
+
+ expNode, ok := decoded.(*expiredNode)
+ if !ok {
+ t.Fatalf("decoded node is not an expired node, got %T", decoded)
+ }
+
+ if expNode.offset != original.offset {
+ t.Errorf("offset mismatch: got %d, want %d", expNode.offset, original.offset)
+ }
+ if expNode.size != original.size {
+ t.Errorf("size mismatch: got %d, want %d", expNode.size, original.size)
+ }
+ }
+}
+
+func TestExpiredNodeEncodedFormat(t *testing.T) {
+ node := &expiredNode{offset: 0x0102030405060708, size: 0x1112131415161718}
+
+ w := rlp.NewEncoderBuffer(nil)
+ node.encode(w)
+ encoded := w.ToBytes()
+ w.Flush()
+
+ expected := []byte{
+ 0x00,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ }
+ if !bytes.Equal(encoded, expected) {
+ t.Errorf("encoded format mismatch: got %x, want %x", encoded, expected)
+ }
+}
+
+func TestExpiredNodeFstring(t *testing.T) {
+ node := &expiredNode{offset: 12345, size: 6789}
+ s := node.fstring("")
+ if s != " " {
+ t.Errorf("fstring mismatch: got %q", s)
+ }
+}
+
+func TestExpiredNodeCache(t *testing.T) {
+ node := &expiredNode{offset: 100}
+ hash, dirty := node.cache()
+ if hash != nil {
+ t.Error("expected nil hash from expired node cache")
+ }
+ if !dirty {
+ t.Error("expected dirty=true from expired node cache")
+ }
+}
+
+func TestExpiredNodeInvalidLength(t *testing.T) {
+ invalidCases := [][]byte{
+ {0x00},
+ {0x00, 0x01},
+ {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08},
+ {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f},
+ {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11},
+ }
+
+ for _, buf := range invalidCases {
+ _, err := decodeNodeUnsafe(nil, buf)
+ if err == nil {
+ t.Errorf("expected error for buffer length %d, got nil", len(buf))
+ }
+ }
+}
+
+func TestExpiredNodeNoArchiveFile(t *testing.T) {
+ // When no archive file exists, Get should return an error
+ tmpDir := t.TempDir()
+ gethDir := filepath.Join(tmpDir, "geth")
+ if err := os.MkdirAll(gethDir, 0755); err != nil {
+ t.Fatal(err)
+ }
+
+ oldDir := archive.ArchiveDataDir
+ archive.ArchiveDataDir = tmpDir
+ defer func() { archive.ArchiveDataDir = oldDir }()
+
+ tr := NewEmpty(nil)
+ tr.root = &expiredNode{offset: 100, size: 50}
+
+ _, err := tr.Get([]byte("key"))
+ if err == nil {
+ t.Error("expected error when archive file doesn't exist")
+ }
+}
+
+func TestExpiredNodeWithResolver(t *testing.T) {
+ records := []*archive.Record{
+ {Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")},
+ }
+ offset, size, cleanup := setupTestArchive(t, records)
+ defer cleanup()
+
+ tr := NewEmpty(nil)
+ tr.root = &expiredNode{offset: offset, size: size}
+
+ val, err := tr.Get([]byte{0x12})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if string(val) != "testvalue" {
+ t.Errorf("value mismatch: got %q, want %q", val, "testvalue")
+ }
+}
+
+func TestExpiredNodeCopy(t *testing.T) {
+ original := &expiredNode{
+ offset: 12345,
+ size: 6789,
+ archiveResolver: archive.ArchivedNodeResolver,
+ }
+
+ copied := copyNode(original)
+ copiedExp, ok := copied.(*expiredNode)
+ if !ok {
+ t.Fatalf("copied node is not an expired node, got %T", copied)
+ }
+
+ if copiedExp.offset != original.offset {
+ t.Errorf("offset mismatch: got %d, want %d", copiedExp.offset, original.offset)
+ }
+
+ if copiedExp.size != original.size {
+ t.Errorf("size mismatch: got %d, want %d", copiedExp.size, original.size)
+ }
+
+ if copiedExp.archiveResolver == nil {
+ t.Error("archive resolver was not copied")
+ }
+}
+
+func TestArchiveRecordsToNodeEmpty(t *testing.T) {
+ _, err := archiveRecordsToNode([]*archive.Record{})
+ if !errors.Is(err, archive.EmptyArchiveRecord) {
+ t.Errorf("expected EmptyArchiveRecord error, got %v", err)
+ }
+
+ _, err = archiveRecordsToNode(nil)
+ if !errors.Is(err, archive.EmptyArchiveRecord) {
+ t.Errorf("expected EmptyArchiveRecord error for nil slice, got %v", err)
+ }
+}
+
+func TestArchiveRecordsToNodeMultiple(t *testing.T) {
+ records := []*archive.Record{
+ {Path: []byte{0x01, 16}, Value: []byte("value1")},
+ {Path: []byte{0x02, 16}, Value: []byte("value2")},
+ }
+
+ node, err := archiveRecordsToNode(records)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+
+ fn, ok := node.(*fullNode)
+ if !ok {
+ t.Fatalf("expected fullNode, got %T", node)
+ }
+
+ if fn.Children[0x01] == nil {
+ t.Error("expected child at index 0x01")
+ }
+ if fn.Children[0x02] == nil {
+ t.Error("expected child at index 0x02")
+ }
+}
+
+func TestExpiredNodeGetMultipleRecords(t *testing.T) {
+ records := []*archive.Record{
+ {Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
+ {Path: []byte{0x04, 0x05, 16}, Value: []byte("value2")},
+ }
+ offset, size, cleanup := setupTestArchive(t, records)
+ defer cleanup()
+
+ tr := NewEmpty(nil)
+ tr.root = &expiredNode{offset: offset, size: size}
+
+ val, err := tr.Get([]byte{0x12})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if string(val) != "value1" {
+ t.Errorf("value mismatch: got %q, want %q", val, "value1")
+ }
+
+ tr2 := NewEmpty(nil)
+ tr2.root = &expiredNode{offset: offset, size: size}
+
+ val2, err := tr2.Get([]byte{0x45})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if string(val2) != "value2" {
+ t.Errorf("value mismatch: got %q, want %q", val2, "value2")
+ }
+}
+
+func TestExpiredNodeGetKeyNotFound(t *testing.T) {
+ records := []*archive.Record{
+ {Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
+ }
+ offset, size, cleanup := setupTestArchive(t, records)
+ defer cleanup()
+
+ tr := NewEmpty(nil)
+ tr.root = &expiredNode{offset: offset, size: size}
+
+ val, err := tr.Get([]byte{0xff, 0xff})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if val != nil {
+ t.Errorf("expected nil value for non-existent key, got %q", val)
+ }
+}
+
+func TestExpiredNodeGetPathMismatch(t *testing.T) {
+ records := []*archive.Record{
+ {Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")},
+ }
+ offset, size, cleanup := setupTestArchive(t, records)
+ defer cleanup()
+
+ tr := NewEmpty(nil)
+ tr.root = &expiredNode{offset: offset, size: size}
+
+ val, err := tr.Get([]byte{0x19})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if val != nil {
+ t.Errorf("expected nil value when leaf key doesn't match, got %q", val)
+ }
+}
+
+func TestExpiredNodeInsert(t *testing.T) {
+ records := []*archive.Record{
+ {Path: []byte{0x01, 0x02, 16}, Value: []byte("existing")},
+ }
+ offset, size, cleanup := setupTestArchive(t, records)
+ defer cleanup()
+
+ tr := NewEmpty(nil)
+ tr.root = &expiredNode{offset: offset, size: size}
+
+ err := tr.Update([]byte{0x45}, []byte("newvalue"))
+ if err != nil {
+ t.Fatalf("unexpected error on insert: %v", err)
+ }
+
+ val, err := tr.Get([]byte{0x45})
+ if err != nil {
+ t.Fatalf("unexpected error on get: %v", err)
+ }
+ if string(val) != "newvalue" {
+ t.Errorf("value mismatch: got %q, want %q", val, "newvalue")
+ }
+}
+
+func TestExpiredNodeUpdate(t *testing.T) {
+ records := []*archive.Record{
+ {Path: []byte{0x01, 0x02, 16}, Value: []byte("oldvalue")},
+ }
+ offset, size, cleanup := setupTestArchive(t, records)
+ defer cleanup()
+
+ tr := NewEmpty(nil)
+ tr.root = &expiredNode{offset: offset, size: size}
+
+ err := tr.Update([]byte{0x12}, []byte("newvalue"))
+ if err != nil {
+ t.Fatalf("unexpected error on update: %v", err)
+ }
+
+ val, err := tr.Get([]byte{0x12})
+ if err != nil {
+ t.Fatalf("unexpected error on get: %v", err)
+ }
+ if string(val) != "newvalue" {
+ t.Errorf("value mismatch: got %q, want %q", val, "newvalue")
+ }
+}
+
+func TestExpiredNodeDelete(t *testing.T) {
+ records := []*archive.Record{
+ {Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
+ {Path: []byte{0x04, 0x05, 16}, Value: []byte("value2")},
+ }
+ offset, size, cleanup := setupTestArchive(t, records)
+ defer cleanup()
+
+ tr := NewEmpty(nil)
+ tr.root = &expiredNode{offset: offset, size: size}
+
+ err := tr.Delete([]byte{0x12})
+ if err != nil {
+ t.Fatalf("unexpected error on delete: %v", err)
+ }
+
+ val, err := tr.Get([]byte{0x12})
+ if err != nil {
+ t.Fatalf("unexpected error on get after delete: %v", err)
+ }
+ if val != nil {
+ t.Errorf("expected nil after delete, got %q", val)
+ }
+
+ val2, err := tr.Get([]byte{0x45})
+ if err != nil {
+ t.Fatalf("unexpected error getting other key: %v", err)
+ }
+ if string(val2) != "value2" {
+ t.Errorf("other value should still exist: got %q, want %q", val2, "value2")
+ }
+}
+
+func TestTrieCopyPreservesArchiveResolver(t *testing.T) {
+ records := []*archive.Record{
+ {Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")},
+ }
+ offset, size, cleanup := setupTestArchive(t, records)
+ defer cleanup()
+
+ tr := NewEmpty(nil)
+ tr.root = &expiredNode{offset: offset, size: size}
+
+ trCopy := tr.Copy()
+
+ val, err := trCopy.Get([]byte{0x12})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if string(val) != "testvalue" {
+ t.Errorf("value mismatch: got %q, want %q", val, "testvalue")
+ }
+}
+
+func TestWalkWithExpiredNodes(t *testing.T) {
+ records := []*archive.Record{
+ {Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
+ {Path: []byte{0x04, 0x05, 16}, Value: []byte("value2")},
+ {Path: []byte{0x07, 0x08, 16}, Value: []byte("value3")},
+ }
+ offset, size, cleanup := setupTestArchive(t, records)
+ defer cleanup()
+
+ tr := NewEmpty(nil)
+ tr.root = &expiredNode{offset: offset, size: size}
+
+ var leaves []string
+ stats, err := tr.Walk(func(path []byte, value []byte) error {
+ leaves = append(leaves, string(value))
+ return nil
+ })
+ if err != nil {
+ t.Fatalf("Walk failed: %v", err)
+ }
+ if stats.Leaves != 3 {
+ t.Errorf("expected 3 leaves, got %d", stats.Leaves)
+ }
+ if stats.ExpiredResolved != 1 {
+ t.Errorf("expected 1 expired resolved, got %d", stats.ExpiredResolved)
+ }
+ // Verify all values were visited
+ expected := map[string]bool{"value1": true, "value2": true, "value3": true}
+ for _, leaf := range leaves {
+ if !expected[leaf] {
+ t.Errorf("unexpected leaf value: %q", leaf)
+ }
+ delete(expected, leaf)
+ }
+ if len(expected) > 0 {
+ t.Errorf("missing leaves: %v", expected)
+ }
+}
+
+func TestWalkEmptyTrie(t *testing.T) {
+ tr := NewEmpty(nil)
+ stats, err := tr.Walk(func(path []byte, value []byte) error {
+ t.Error("callback should not be called for empty trie")
+ return nil
+ })
+ if err != nil {
+ t.Fatalf("Walk failed: %v", err)
+ }
+ if stats.Leaves != 0 || stats.ExpiredResolved != 0 {
+ t.Errorf("expected zero stats for empty trie, got leaves=%d expired=%d", stats.Leaves, stats.ExpiredResolved)
+ }
+}
+
+func TestWalkCallbackError(t *testing.T) {
+ records := []*archive.Record{
+ {Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
+ }
+ offset, size, cleanup := setupTestArchive(t, records)
+ defer cleanup()
+
+ tr := NewEmpty(nil)
+ tr.root = &expiredNode{offset: offset, size: size}
+
+ testErr := errors.New("test error")
+ _, err := tr.Walk(func(path []byte, value []byte) error {
+ return testErr
+ })
+ if !errors.Is(err, testErr) {
+ t.Fatalf("expected test error, got %v", err)
+ }
+}
+
+// TestExpiredNodeResolvedSubtreeDirty verifies that when an expired node is
+// resolved and a sibling leaf is modified, the commit captures ALL resolved
+// nodes (not just the modified path). Without this fix, resolved-but-unmodified
+// nodes would be lost: not in the diff layer (clean) and not in the raw DB
+// (deleted by archiver).
+func TestExpiredNodeResolvedSubtreeDirty(t *testing.T) {
+ // Use large values (>32 bytes) so leaf nodes are NOT embedded in
+ // their parent. This matches production storage tries where
+ // intermediate nodes are large enough to be stored independently.
+ bigVal1 := bytes.Repeat([]byte("A"), 40)
+ bigVal2 := bytes.Repeat([]byte("B"), 40)
+
+ // Create an archive with records under different branches.
+ records := []*archive.Record{
+ {Path: []byte{0x01, 0x02, 16}, Value: bigVal1},
+ {Path: []byte{0x04, 0x05, 16}, Value: bigVal2},
+ }
+ offset, size, cleanup := setupTestArchive(t, records)
+ defer cleanup()
+
+ tr := NewEmpty(nil)
+ tr.root = &expiredNode{offset: offset, size: size}
+
+ // Insert a value that goes through one branch of the resolved subtree.
+ // This modifies path [1, ...] but leaves path [4, ...] unmodified.
+ if err := tr.Update([]byte{0x12}, bytes.Repeat([]byte("C"), 40)); err != nil {
+ t.Fatalf("Update failed: %v", err)
+ }
+
+ // Commit the trie. The NodeSet should be non-nil because we modified data.
+ _, nodes := tr.Commit(false)
+ if nodes == nil {
+ t.Fatal("expected non-nil NodeSet after modifying expired subtree")
+ }
+
+ // The resolved-but-unmodified sibling (path [4, 5]) should also be
+ // captured in the NodeSet, because markSubtreeDirty ensures all resolved
+ // nodes are dirty. Count the nodes to verify.
+ nodeCount := len(nodes.Nodes)
+ // We expect at least 3 nodes: the root, the modified branch, and the
+ // sibling branch. The exact count depends on trie structure.
+ if nodeCount < 3 {
+ t.Errorf("expected at least 3 nodes in NodeSet (root + modified + sibling), got %d", nodeCount)
+ }
+}
+
+// TestMarkSubtreeDirty verifies that markSubtreeDirty correctly sets the dirty
+// flag on all nodes in a subtree while preserving cached hashes.
+func TestMarkSubtreeDirty(t *testing.T) {
+ // Build a small trie structure
+ leaf1 := &shortNode{Key: []byte{1, 16}, Val: valueNode("v1")}
+ leaf2 := &shortNode{Key: []byte{2, 16}, Val: valueNode("v2")}
+ branch := &fullNode{}
+ branch.Children[1] = leaf1
+ branch.Children[2] = leaf2
+
+ // Set hash but not dirty (as if loaded from DB)
+ branch.flags = nodeFlag{hash: hashNode("testhash"), dirty: false}
+ leaf1.flags = nodeFlag{hash: hashNode("hash1"), dirty: false}
+ leaf2.flags = nodeFlag{hash: hashNode("hash2"), dirty: false}
+
+ markSubtreeDirty(branch)
+
+ // All nodes should be dirty
+ if !branch.flags.dirty {
+ t.Error("branch should be dirty")
+ }
+ if !leaf1.flags.dirty {
+ t.Error("leaf1 should be dirty")
+ }
+ if !leaf2.flags.dirty {
+ t.Error("leaf2 should be dirty")
+ }
+
+ // Hashes should be preserved
+ if !bytes.Equal(branch.flags.hash, hashNode("testhash")) {
+ t.Error("branch hash should be preserved")
+ }
+ if !bytes.Equal(leaf1.flags.hash, hashNode("hash1")) {
+ t.Error("leaf1 hash should be preserved")
+ }
+ if !bytes.Equal(leaf2.flags.hash, hashNode("hash2")) {
+ t.Error("leaf2 hash should be preserved")
+ }
+}
+
+func TestExpiredNodeGetNode(t *testing.T) {
+ records := []*archive.Record{
+ {Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")},
+ }
+ offset, size, cleanup := setupTestArchive(t, records)
+ defer cleanup()
+
+ tr := NewEmpty(nil)
+ tr.root = &expiredNode{offset: offset, size: size}
+
+ _, _, err := tr.GetNode(hexToCompact([]byte{0x01, 0x02}))
+ if err != nil && err.Error() != "non-consensus node" {
+ t.Fatalf("unexpected error: %v", err)
+ }
+}
diff --git a/trie/hasher.go b/trie/hasher.go
index a2a1f5b662..d4376e12e2 100644
--- a/trie/hasher.go
+++ b/trie/hasher.go
@@ -18,6 +18,7 @@ package trie
import (
"bytes"
+ "encoding/binary"
"fmt"
"sync"
@@ -97,6 +98,22 @@ func (h *hasher) hash(n node, force bool) []byte {
// hash nodes don't have children, so they're left as were
return n
+ case *expiredNode:
+ // Return the original subtree hash that was cached when the
+ // expired node was decoded. The parent node references this
+ // hash, so we must return the same value to keep the Merkle
+ // root consistent.
+ if n.cachedHash != nil {
+ return n.cachedHash
+ }
+ // Fallback: hash the marker blob (should not happen in practice
+ // because decodeNodeUnsafe always provides the hash).
+ var buf [1 + 2*8]byte // 17 bytes
+ buf[0] = expiredNodeMarker
+ binary.BigEndian.PutUint64(buf[1:], n.offset)
+ binary.BigEndian.PutUint64(buf[9:], n.size)
+ return h.hashData(buf[:])
+
default:
panic(fmt.Errorf("unexpected node type, %T", n))
}
@@ -214,6 +231,12 @@ func (h *hasher) proofHash(original node) []byte {
return bytes.Clone(h.encodeShortNode(n))
case *fullNode:
return bytes.Clone(h.encodeFullNode(n))
+ case *expiredNode:
+ var buf [1 + 2*8]byte
+ buf[0] = expiredNodeMarker
+ binary.BigEndian.PutUint64(buf[1:], n.offset)
+ binary.BigEndian.PutUint64(buf[9:], n.size)
+ return buf[:]
default:
panic(fmt.Errorf("unexpected node type, %T", original))
}
diff --git a/trie/node.go b/trie/node.go
index b5094ff4b7..f9e0840c1d 100644
--- a/trie/node.go
+++ b/trie/node.go
@@ -18,13 +18,16 @@ package trie
import (
"bytes"
+ "encoding/binary"
"fmt"
"io"
"strings"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
+ "github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
+ "github.com/ethereum/go-ethereum/trie/archive"
)
var indices = []string{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "[17]"}
@@ -158,6 +161,15 @@ func decodeNodeUnsafe(hash, buf []byte) (node, error) {
if len(buf) == 0 {
return nil, io.ErrUnexpectedEOF
}
+ if buf[0] == expiredNodeMarker {
+ if len(buf) != 1+2*archive.OffsetSize {
+ return nil, fmt.Errorf("invalid expired node length: %d", len(buf))
+ }
+ offset := binary.BigEndian.Uint64(buf[1:])
+ size := binary.BigEndian.Uint64(buf[1+archive.OffsetSize:])
+ log.Debug("Decoded expired node", "offset", offset, "size", size, "hash", common.BytesToHash(hash))
+ return &expiredNode{offset: offset, size: size, cachedHash: hashNode(hash), archiveResolver: archive.ArchivedNodeResolver}, nil
+ }
elems, _, err := rlp.SplitList(buf)
if err != nil {
return nil, fmt.Errorf("decode error: %v", err)
diff --git a/trie/proof.go b/trie/proof.go
index 58075daf9b..5be05c6f81 100644
--- a/trie/proof.go
+++ b/trie/proof.go
@@ -25,6 +25,7 @@ import (
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
+ "github.com/ethereum/go-ethereum/trie/archive"
)
// Prove constructs a merkle proof for key. The result contains all encoded nodes
@@ -78,6 +79,16 @@ func (t *Trie) Prove(key []byte, proofDb ethdb.KeyValueWriter) error {
// clean cache or the database, they are all in their own
// copy and safe to use unsafe decoder.
tn = mustDecodeNodeUnsafe(n, blob)
+ case *expiredNode:
+ records, err := archive.ArchivedNodeResolver(n.offset, n.size)
+ if err != nil {
+ return fmt.Errorf("failed to resolve expired node in proof: %w", err)
+ }
+ resolved, err := archiveRecordsToNode(records)
+ if err != nil {
+ return fmt.Errorf("failed to rebuild expired node in proof: %w", err)
+ }
+ tn = resolved
default:
panic(fmt.Sprintf("%T: invalid node: %v", tn, tn))
}
@@ -617,6 +628,8 @@ func get(tn node, key []byte, skipResolved bool) ([]byte, node) {
}
case hashNode:
return key, n
+ case *expiredNode:
+ return key, n
case nil:
return key, nil
case valueNode:
diff --git a/trie/trie.go b/trie/trie.go
index 1ef2c2f1a6..69db68b515 100644
--- a/trie/trie.go
+++ b/trie/trie.go
@@ -26,6 +26,7 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/log"
+ "github.com/ethereum/go-ethereum/trie/archive"
"github.com/ethereum/go-ethereum/trie/trienode"
"github.com/ethereum/go-ethereum/triedb/database"
"golang.org/x/sync/errgroup"
@@ -57,6 +58,10 @@ type Trie struct {
// reader is the handler trie can retrieve nodes from.
reader *Reader
+ // archiveResolver is an optional callback to resolve expired nodes from
+ // an archive file.
+ archiveResolver archive.ResolverFn
+
// Various tracers for capturing the modifications to trie
opTracer *opTracer
prevalueTracer *PrevalueTracer
@@ -70,17 +75,23 @@ func (t *Trie) newFlag() nodeFlag {
// Copy returns a copy of Trie.
func (t *Trie) Copy() *Trie {
return &Trie{
- root: copyNode(t.root),
- owner: t.owner,
- committed: t.committed,
- unhashed: t.unhashed,
- uncommitted: t.uncommitted,
- reader: t.reader,
- opTracer: t.opTracer.copy(),
- prevalueTracer: t.prevalueTracer.Copy(),
+ root: copyNode(t.root),
+ owner: t.owner,
+ committed: t.committed,
+ unhashed: t.unhashed,
+ uncommitted: t.uncommitted,
+ reader: t.reader,
+ archiveResolver: t.archiveResolver,
+ opTracer: t.opTracer.copy(),
+ prevalueTracer: t.prevalueTracer.Copy(),
}
}
+// SetArchiveResolver sets the archive resolver callback for expired nodes.
+func (t *Trie) SetArchiveResolver(resolver archive.ResolverFn) {
+ t.archiveResolver = resolver
+}
+
// New creates the trie instance with provided trie id and the read-only
// database. The state specified by trie id must be available, otherwise
// an error will be returned. The trie root specified by trie id can be
@@ -218,6 +229,14 @@ func (t *Trie) get(origNode node, key []byte, pos int) (value []byte, newnode no
}
value, newnode, _, err := t.get(child, key, pos)
return value, newnode, true, err
+ case *expiredNode:
+ log.Debug("Resolving expired node in get()", "owner", t.owner, "offset", n.offset, "size", n.size, "pos", pos)
+ newnode, err := resolveExpiredNodeData(n)
+ if err != nil {
+ return nil, n, false, err
+ }
+ value, _, _, err = t.get(newnode, key, pos)
+ return value, newnode, true, err
default:
panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode))
}
@@ -352,6 +371,14 @@ func (t *Trie) getNode(origNode node, path []byte, pos int) (item []byte, newnod
item, newnode, resolved, err := t.getNode(child, path, pos)
return item, newnode, resolved + 1, err
+ case *expiredNode:
+ rn, err := resolveExpiredNodeData(n)
+ if err != nil {
+ return nil, n, 0, err
+ }
+ item, newnode, resolvedCount, err := t.getNode(rn, path, pos)
+ return item, newnode, resolvedCount + 1, err
+
default:
panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode))
}
@@ -475,6 +502,18 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error
}
return true, nn, nil
+ case *expiredNode:
+ log.Debug("Resolving expired node in insert()", "owner", t.owner, "offset", n.offset, "size", n.size)
+ rn, err := resolveExpiredNodeData(n)
+ if err != nil {
+ return false, nil, err
+ }
+ dirty, nn, err := t.insert(rn, prefix, key, value)
+ if !dirty || err != nil {
+ return false, rn, err
+ }
+ return true, nn, nil
+
default:
panic(fmt.Sprintf("%T: invalid node: %v", n, n))
}
@@ -636,6 +675,18 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) {
}
return true, nn, nil
+ case *expiredNode:
+ log.Debug("Resolving expired node in delete()", "owner", t.owner, "offset", n.offset, "size", n.size)
+ rn, err := resolveExpiredNodeData(n)
+ if err != nil {
+ return false, nil, err
+ }
+ dirty, nn, err := t.delete(rn, prefix, key)
+ if !dirty || err != nil {
+ return false, rn, err
+ }
+ return true, nn, nil
+
default:
panic(fmt.Sprintf("%T: invalid node: %v (%v)", n, n, key))
}
@@ -666,14 +717,24 @@ func copyNode(n node) node {
}
case hashNode:
return n
+ case *expiredNode:
+ return &expiredNode{
+ offset: n.offset,
+ size: n.size,
+ cachedHash: common.CopyBytes(n.cachedHash),
+ archiveResolver: n.archiveResolver,
+ }
default:
panic(fmt.Sprintf("%T: unknown node type", n))
}
}
func (t *Trie) resolve(n node, prefix []byte) (node, error) {
- if n, ok := n.(hashNode); ok {
+ switch n := n.(type) {
+ case hashNode:
return t.resolveAndTrack(n, prefix)
+ case *expiredNode:
+ return resolveExpiredNodeData(n)
}
return n, nil
}
@@ -784,6 +845,58 @@ func (t *Trie) Witness() map[string][]byte {
return t.prevalueTracer.Values()
}
+// WalkStats holds statistics from a Walk traversal.
+type WalkStats struct {
+ Leaves int // Number of leaf nodes visited
+ ExpiredResolved int // Number of expired nodes resolved from archive
+}
+
+// Walk recursively traverses the trie, resolving all nodes including
+// hashNodes and expiredNodes. It calls fn for each leaf found.
+// This triggers hash verification for expired nodes via cachedHash.
+func (t *Trie) Walk(fn func(path []byte, value []byte) error) (WalkStats, error) {
+ return t.walk(t.root, nil, fn)
+}
+
+func (t *Trie) walk(n node, path []byte, fn func([]byte, []byte) error) (WalkStats, error) {
+ switch n := n.(type) {
+ case *shortNode:
+ return t.walk(n.Val, append(append([]byte{}, path...), n.Key...), fn)
+ case *fullNode:
+ var stats WalkStats
+ for i, child := range n.Children[:16] {
+ if child != nil {
+ childStats, err := t.walk(child, append(append([]byte{}, path...), byte(i)), fn)
+ if err != nil {
+ return stats, err
+ }
+ stats.Leaves += childStats.Leaves
+ stats.ExpiredResolved += childStats.ExpiredResolved
+ }
+ }
+ return stats, nil
+ case hashNode:
+ resolved, err := t.resolveAndTrack(n, path)
+ if err != nil {
+ return WalkStats{}, err
+ }
+ return t.walk(resolved, path, fn)
+ case *expiredNode:
+ resolved, err := resolveExpiredNodeData(n)
+ if err != nil {
+ return WalkStats{}, err
+ }
+ childStats, err := t.walk(resolved, path, fn)
+ childStats.ExpiredResolved++
+ return childStats, err
+ case valueNode:
+ return WalkStats{Leaves: 1}, fn(path, []byte(n))
+ case nil:
+ return WalkStats{}, nil
+ }
+ return WalkStats{}, nil
+}
+
// reset drops the referenced root node and cleans all internal state.
func (t *Trie) reset() {
t.root = nil
diff --git a/triedb/database.go b/triedb/database.go
index ef95169df1..71b578367b 100644
--- a/triedb/database.go
+++ b/triedb/database.go
@@ -399,6 +399,28 @@ func (db *Database) Disk() ethdb.Database {
return db.disk
}
+// DiffHead returns the root hash of the topmost diff layer in pathdb.
+// If there are no diff layers or the backend is not pathdb, it returns
+// the zero hash and false.
+func (db *Database) DiffHead() (common.Hash, bool) {
+ pdb, ok := db.backend.(*pathdb.Database)
+ if !ok {
+ return common.Hash{}, false
+ }
+ return pdb.DiffHead()
+}
+
+// DisableStateHistory closes and disables the state history freezer.
+// This is used by the archiver to bypass state history writes during
+// diff layer flushing when state history may have gaps.
+func (db *Database) DisableStateHistory() {
+ pdb, ok := db.backend.(*pathdb.Database)
+ if !ok {
+ return
+ }
+ pdb.DisableStateHistory()
+}
+
// SnapshotCompleted returns the indicator if the snapshot is completed.
func (db *Database) SnapshotCompleted() bool {
pdb, ok := db.backend.(*pathdb.Database)
diff --git a/triedb/pathdb/database.go b/triedb/pathdb/database.go
index e52949c93e..ba606552df 100644
--- a/triedb/pathdb/database.go
+++ b/triedb/pathdb/database.go
@@ -318,6 +318,30 @@ func (db *Database) Update(root common.Hash, parentRoot common.Hash, block uint6
return db.tree.cap(root, maxDiffLayers)
}
+// DiffHead returns the root hash of the topmost diff layer. If there are no
+// diff layers (only the disk layer), it returns the disk layer root and false.
+func (db *Database) DiffHead() (common.Hash, bool) {
+ db.lock.RLock()
+ defer db.lock.RUnlock()
+
+ return db.tree.diffHead()
+}
+
+// DisableStateHistory closes and disables the state history freezer. This is
+// used by the archiver to bypass state history writes during diff layer flushing,
+// since the archiver only needs trie nodes committed to disk and state history
+// may have gaps from unclean shutdowns that prevent sequential appends.
+func (db *Database) DisableStateHistory() {
+ db.lock.Lock()
+ defer db.lock.Unlock()
+
+ if db.stateFreezer != nil {
+ db.stateFreezer.Close()
+ db.stateFreezer = nil
+ log.Info("Disabled state history freezer")
+ }
+}
+
// Commit traverses downwards the layer tree from a specified layer with the
// provided state root and all the layers below are flattened downwards. It
// can be used alone and mostly for test purposes.
diff --git a/triedb/pathdb/history.go b/triedb/pathdb/history.go
index 55ec29e4f0..4730802d9c 100644
--- a/triedb/pathdb/history.go
+++ b/triedb/pathdb/history.go
@@ -278,9 +278,17 @@ func truncateFromHead(store ethdb.AncientStore, typ historyType, nhead uint64) (
return 0, err
}
// Ensure that the truncation target falls within the valid range.
- if ohead < nhead || nhead < otail {
+ if nhead < otail {
return 0, fmt.Errorf("%w, %s, tail: %d, head: %d, target: %d", errHeadTruncationOutOfRange, typ, otail, ohead, nhead)
}
+ // If the target is ahead of the current head, there's nothing to truncate.
+ // This can happen after unclean shutdowns where the state history was not
+ // fully written.
+ if ohead < nhead {
+ log.Warn("State history shorter than target, nothing to truncate",
+ "type", typ.String(), "head", ohead, "target", nhead)
+ return 0, nil
+ }
// Short circuit if nothing to truncate.
if ohead == nhead {
return 0, nil
diff --git a/triedb/pathdb/history_state_test.go b/triedb/pathdb/history_state_test.go
index 5c3026a571..3e75d3baa8 100644
--- a/triedb/pathdb/history_state_test.go
+++ b/triedb/pathdb/history_state_test.go
@@ -244,8 +244,8 @@ func TestTruncateOutOfRange(t *testing.T) {
target uint64
expErr error
}{
- {0, head, nil}, // nothing to delete
- {0, head + 1, errHeadTruncationOutOfRange},
+ {0, head, nil}, // nothing to delete
+ {0, head + 1, nil}, // gracefully handled after unclean shutdown
{0, tail - 1, errHeadTruncationOutOfRange},
{1, tail, nil}, // nothing to delete
{1, head + 1, errTailTruncationOutOfRange},
diff --git a/triedb/pathdb/layertree.go b/triedb/pathdb/layertree.go
index b20e40bd05..99fd23a2a1 100644
--- a/triedb/pathdb/layertree.go
+++ b/triedb/pathdb/layertree.go
@@ -31,6 +31,7 @@ import (
// of the referenced layer by themselves.
type layerTree struct {
base *diskLayer
+ head common.Hash // Root hash of the topmost layer (diff or disk)
layers map[common.Hash]layer
// descendants is a two-dimensional map where the keys represent
@@ -59,6 +60,7 @@ func (tree *layerTree) init(head layer) {
defer tree.lock.Unlock()
current := head
+ tree.head = head.rootHash()
tree.layers = make(map[common.Hash]layer)
tree.descendants = make(map[common.Hash]map[common.Hash]struct{})
@@ -76,6 +78,18 @@ func (tree *layerTree) init(head layer) {
tree.lookup = newLookup(head, tree.isDescendant)
}
+// diffHead returns the root hash of the topmost diff layer. If there are no
+// diff layers, returns the disk layer root and false.
+func (tree *layerTree) diffHead() (common.Hash, bool) {
+ tree.lock.RLock()
+ defer tree.lock.RUnlock()
+
+ if _, ok := tree.layers[tree.head].(*diffLayer); ok {
+ return tree.head, true
+ }
+ return tree.base.rootHash(), false
+}
+
// get retrieves a layer belonging to the given state root.
func (tree *layerTree) get(root common.Hash) layer {
tree.lock.RLock()
diff --git a/triedb/pathdb/reader.go b/triedb/pathdb/reader.go
index e087ef26ed..845667b578 100644
--- a/triedb/pathdb/reader.go
+++ b/triedb/pathdb/reader.go
@@ -69,7 +69,7 @@ func (r *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte,
return nil, err
}
// Error out if the local one is inconsistent with the target.
- if !r.noHashCheck && got != hash {
+ if !r.noHashCheck && (len(blob) > 0 && blob[0] != 0) && got != hash {
// Location is always available even if the node
// is not found.
switch loc.loc {