trie, cmd/geth: add archiver command

This commit is contained in:
Guillaume Ballet 2026-01-25 09:05:07 +01:00
parent d087178f8c
commit d82e83d2d9
4 changed files with 698 additions and 0 deletions

201
cmd/geth/archivecmd.go Normal file
View file

@ -0,0 +1,201 @@
// Copyright 2026 The go-ethereum Authors
// This file is part of go-ethereum.
//
// go-ethereum is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// go-ethereum is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
package main
import (
"errors"
"fmt"
"path/filepath"
"slices"
"time"
"github.com/ethereum/go-ethereum/cmd/utils"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie"
"github.com/ethereum/go-ethereum/trie/archive"
"github.com/urfave/cli/v2"
)
var (
// Flags for the archive command
archiveOutputFlag = &cli.StringFlag{
Name: "output",
Usage: "Path to archive output file",
Value: "", // Default: <datadir>/nodearchive
}
archiveCompactionIntervalFlag = &cli.Uint64Flag{
Name: "compaction-interval",
Usage: "Run compaction after this many subtrees (0 = disable)",
Value: 1000,
}
archiveDryRunFlag = &cli.BoolFlag{
Name: "dry-run",
Usage: "Simulate without modifying database",
}
// Commands
archiveCommand = &cli.Command{
Name: "archive",
Usage: "Archive state trie nodes to reduce database size",
Subcommands: []*cli.Command{
archiveGenerateCmd,
},
}
archiveGenerateCmd = &cli.Command{
Name: "generate",
Usage: "Generate archive files from height-3 subtrees",
ArgsUsage: "[state-root]",
Action: archiveGenerate,
Flags: slices.Concat([]cli.Flag{
archiveOutputFlag,
archiveCompactionIntervalFlag,
archiveDryRunFlag,
}, utils.NetworkFlags, utils.DatabaseFlags),
Description: `
Walks the state trie of the specified root (or head block) and archives
subtrees at height 3. Each archived subtree is replaced with an expiredNode
that references the archive file offset and size.
Height is measured from leaves: leaves=0, parents=1, etc. A height-3 node
has leaves at most 3 levels below it.
Examples:
# Archive from head state
geth archive generate --datadir /path/to/datadir
# Dry run to see what would be archived
geth archive generate --dry-run --datadir /path/to/datadir
# Archive from a specific state root
geth archive generate 0x1234...abcd --datadir /path/to/datadir
# Custom output and compaction interval
geth archive generate --output /path/to/archive --compaction-interval 500
`,
}
)
func archiveGenerate(ctx *cli.Context) error {
// 1. Setup node and databases
stack, _ := makeConfigNode(ctx)
defer stack.Close()
// Open database in write mode (readOnly=false) unless dry-run
dryRun := ctx.Bool(archiveDryRunFlag.Name)
chaindb := utils.MakeChainDatabase(ctx, stack, dryRun)
defer chaindb.Close()
// Check state scheme - we only support PathDB
scheme := cycleCheckScheme(ctx, chaindb)
if scheme != rawdb.PathScheme {
return fmt.Errorf("archive generation requires path-based state scheme, got: %s", scheme)
}
triedb := utils.MakeTrieDatabase(ctx, stack, chaindb, false, false, false)
defer triedb.Close()
// 2. Determine state root
var root common.Hash
if ctx.NArg() > 0 {
root = common.HexToHash(ctx.Args().First())
log.Info("Using specified state root", "root", root)
} else {
headBlock := rawdb.ReadHeadBlock(chaindb)
if headBlock == nil {
return errors.New("no head block found - specify a state root or sync the chain first")
}
root = headBlock.Root()
log.Info("Using head block state", "number", headBlock.NumberU64(), "root", root)
}
// Verify the state exists
if !rawdb.HasAccountTrieNode(chaindb, nil) {
return errors.New("state trie not found in database")
}
// 3. Open archive writer (unless dry-run)
var writer *archive.ArchiveWriter
archivePath := ctx.String(archiveOutputFlag.Name)
if archivePath == "" {
archivePath = filepath.Join(stack.ResolvePath(""), "nodearchive")
}
if !dryRun {
var err error
writer, err = archive.NewArchiveWriter(archivePath)
if err != nil {
return fmt.Errorf("failed to open archive file %s: %w", archivePath, err)
}
defer writer.Close()
log.Info("Opened archive file", "path", archivePath)
} else {
log.Info("Dry run mode - no changes will be made")
}
// 4. Create and run archiver
archiver := trie.NewArchiver(
chaindb,
triedb,
writer,
ctx.Uint64(archiveCompactionIntervalFlag.Name),
dryRun,
)
start := time.Now()
if err := archiver.ProcessState(root); err != nil {
return fmt.Errorf("archive generation failed: %w", err)
}
// 5. Get stats and optionally run final compaction
subtrees, leaves, bytesDeleted := archiver.Stats()
if !dryRun && subtrees > 0 {
log.Info("Running final database compaction")
if err := chaindb.Compact(nil, nil); err != nil {
log.Warn("Final compaction failed", "err", err)
}
}
// 6. Print summary
var archiveSize uint64
if writer != nil {
archiveSize = writer.Offset()
}
log.Info("Archive generation complete",
"subtrees", subtrees,
"leaves", leaves,
"bytesDeleted", bytesDeleted,
"archiveSize", archiveSize,
"elapsed", common.PrettyDuration(time.Since(start)))
if dryRun {
log.Info("This was a dry run - no changes were made to the database")
}
return nil
}
// cycleCheckScheme returns the state scheme for the database.
// It's a helper to check what scheme is in use.
func cycleCheckScheme(ctx *cli.Context, db ethdb.Database) string {
return rawdb.ReadStateScheme(db)
}

View file

@ -239,6 +239,8 @@ func init() {
dumpConfigCommand,
// see dbcmd.go
dbCommand,
// See archivecmd.go
archiveCommand,
// See cmd/utils/flags_legacy.go
utils.ShowDeprecated,
// See snapshot.go

92
trie/archive/writer.go Normal file
View file

@ -0,0 +1,92 @@
// Copyright 2026 go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package archive
import (
"os"
"sync"
"github.com/ethereum/go-ethereum/rlp"
)
// ArchiveWriter is an append-only writer for archive files.
// It writes RLP-encoded records to a file and tracks the current offset.
type ArchiveWriter struct {
file *os.File
offset uint64
mu sync.Mutex
}
// NewArchiveWriter creates a new archive writer that appends to the given file.
// If the file exists, it will be opened in append mode and writing continues
// from the current end of file. If it doesn't exist, it will be created.
func NewArchiveWriter(path string) (*ArchiveWriter, error) {
file, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return nil, err
}
info, err := file.Stat()
if err != nil {
file.Close()
return nil, err
}
return &ArchiveWriter{
file: file,
offset: uint64(info.Size()),
}, nil
}
// WriteSubtree writes all records belonging to a subtree and returns
// the starting offset and total size of the written data.
// This is the atomic unit of archival - all records for a subtree are
// written together and can be retrieved together using the returned
// offset and size.
func (w *ArchiveWriter) WriteSubtree(records []*Record) (offset uint64, size uint64, err error) {
w.mu.Lock()
defer w.mu.Unlock()
startOffset := w.offset
for _, rec := range records {
encoded, err := rlp.EncodeToBytes(rec)
if err != nil {
return 0, 0, err
}
if _, err := w.file.Write(encoded); err != nil {
return 0, 0, err
}
w.offset += uint64(len(encoded))
}
return startOffset, w.offset - startOffset, nil
}
// Sync flushes the file to disk. This should be called after writing
// a subtree and before modifying the database to ensure crash consistency.
func (w *ArchiveWriter) Sync() error {
return w.file.Sync()
}
// Close closes the archive file.
func (w *ArchiveWriter) Close() error {
return w.file.Close()
}
// Offset returns the current write offset in the file.
func (w *ArchiveWriter) Offset() uint64 {
w.mu.Lock()
defer w.mu.Unlock()
return w.offset
}

403
trie/archiver.go Normal file
View file

@ -0,0 +1,403 @@
// Copyright 2026 go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
"encoding/binary"
"fmt"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie/archive"
"github.com/ethereum/go-ethereum/triedb/database"
)
// subtreeInfo holds information about a subtree to be archived.
// It contains all the data needed to write the subtree to an archive
// and replace it with an expiredNode in the database.
type subtreeInfo struct {
path []byte // Hex-encoded path to subtree root
owner common.Hash // Zero for account trie, account hash for storage
height int // Height of subtree (from leaves)
leaves []*archive.Record // All leaf records (relative path + encoded node)
nodePaths [][]byte // Paths of all nodes to delete
}
// Archiver handles the archival process of trie nodes.
// It walks the state trie, identifies subtrees at height 3,
// archives their leaf data, and replaces them with expiredNode markers.
type Archiver struct {
db ethdb.Database
triedb database.NodeDatabase
writer *archive.ArchiveWriter
compactionInterval uint64
dryRun bool
stateRoot common.Hash
// Progress tracking
subtreesArchived uint64
bytesDeleted uint64
leavesArchived uint64
lastCompaction uint64
}
// NewArchiver creates a new archiver instance.
//
// Parameters:
// - db: The underlying key-value database
// - triedb: The trie database for reading nodes
// - writer: Archive file writer (can be nil for dry run)
// - compactionInterval: Run compaction after this many subtrees (0 = disable)
// - dryRun: If true, don't modify the database
func NewArchiver(db ethdb.Database, triedb database.NodeDatabase,
writer *archive.ArchiveWriter, compactionInterval uint64, dryRun bool) *Archiver {
return &Archiver{
db: db,
triedb: triedb,
writer: writer,
compactionInterval: compactionInterval,
dryRun: dryRun,
}
}
// ProcessState archives subtrees from the given state root.
// It processes the account trie first, then all storage tries.
func (a *Archiver) ProcessState(root common.Hash) error {
a.stateRoot = root
// Process account trie (owner = zero hash)
log.Info("Processing account trie", "root", root)
accountTrie, err := New(StateTrieID(root), a.triedb)
if err != nil {
return fmt.Errorf("failed to open account trie: %w", err)
}
if err := a.processTrie(common.Hash{}, accountTrie); err != nil {
return fmt.Errorf("failed to process account trie: %w", err)
}
// Process storage tries for accounts with storage
log.Info("Processing storage tries")
iter, err := accountTrie.NodeIterator(nil)
if err != nil {
return fmt.Errorf("failed to create account iterator: %w", err)
}
kvIter := NewIterator(iter)
for kvIter.Next() {
// Decode the account to check for storage
var acc types.StateAccount
if err := rlp.DecodeBytes(kvIter.Value, &acc); err != nil {
log.Warn("Failed to decode account", "err", err)
continue
}
if acc.Root == types.EmptyRootHash {
continue
}
// Process this account's storage trie
accountHash := common.BytesToHash(kvIter.Key)
storageID := StorageTrieID(root, accountHash, acc.Root)
storageTrie, err := New(storageID, a.triedb)
if err != nil {
log.Warn("Failed to open storage trie", "account", accountHash, "err", err)
continue
}
if err := a.processTrie(accountHash, storageTrie); err != nil {
log.Warn("Failed to process storage trie", "account", accountHash, "err", err)
}
}
if kvIter.Err != nil {
return fmt.Errorf("account iteration error: %w", kvIter.Err)
}
return nil
}
// processTrie finds and archives all height-3 subtrees in the trie.
func (a *Archiver) processTrie(owner common.Hash, t *Trie) error {
if t.root == nil {
return nil
}
subtrees := a.findHeight3Subtrees(t.root, nil, owner)
log.Info("Found subtrees to archive", "owner", owner, "count", len(subtrees))
for _, info := range subtrees {
if err := a.archiveSubtree(info); err != nil {
log.Warn("Failed to archive subtree", "path", common.Bytes2Hex(info.path), "err", err)
continue
}
a.subtreesArchived++
a.leavesArchived += uint64(len(info.leaves))
if err := a.maybeCompact(); err != nil {
log.Warn("Compaction failed", "err", err)
}
}
return nil
}
// findHeight3Subtrees recursively finds all subtrees with height == 3.
// Height is measured from leaves: leaves=0, their parents=1, etc.
func (a *Archiver) findHeight3Subtrees(n node, path []byte, owner common.Hash) []*subtreeInfo {
info := a.computeSubtreeInfo(n, path, owner)
if info == nil {
return nil
}
// If this subtree has height 3, it's a candidate for archival
if info.height == 3 {
return []*subtreeInfo{info}
}
// If height > 3, recurse into children to find height-3 subtrees
if info.height > 3 {
var results []*subtreeInfo
switch n := n.(type) {
case *fullNode:
for i, child := range n.Children[:16] {
if child != nil {
childPath := append(append([]byte{}, path...), byte(i))
results = append(results, a.findHeight3Subtrees(child, childPath, owner)...)
}
}
case *shortNode:
childPath := append(append([]byte{}, path...), n.Key...)
results = append(results, a.findHeight3Subtrees(n.Val, childPath, owner)...)
case hashNode:
// Resolve and recurse
resolved, err := a.resolveNode(n, path, owner)
if err == nil {
results = append(results, a.findHeight3Subtrees(resolved, path, owner)...)
}
}
return results
}
// Height < 3: no archivable subtrees here
return nil
}
// computeSubtreeInfo computes height and collects leaves for a subtree.
// Returns nil if the node is nil or an error occurs during resolution.
func (a *Archiver) computeSubtreeInfo(n node, path []byte, owner common.Hash) *subtreeInfo {
switch n := n.(type) {
case nil:
return nil
case valueNode:
// Leaf: height 0
// Encode the leaf as a shortNode for archive storage
return &subtreeInfo{
path: copyBytes(path),
owner: owner,
height: 0,
leaves: []*archive.Record{{
Path: nil, // Empty relative path for leaf at root
Value: []byte(n),
}},
nodePaths: [][]byte{copyBytes(path)},
}
case *shortNode:
childPath := append(append([]byte{}, path...), n.Key...)
childInfo := a.computeSubtreeInfo(n.Val, childPath, owner)
if childInfo == nil {
return nil
}
// Adjust relative paths in leaves to include this node's key
for _, leaf := range childInfo.leaves {
leaf.Path = append(append([]byte{}, n.Key...), leaf.Path...)
}
return &subtreeInfo{
path: copyBytes(path),
owner: owner,
height: childInfo.height + 1,
leaves: childInfo.leaves,
nodePaths: append([][]byte{copyBytes(path)}, childInfo.nodePaths...),
}
case *fullNode:
var (
maxHeight = 0
allLeaves []*archive.Record
allPaths = [][]byte{copyBytes(path)}
)
for i, child := range n.Children[:16] {
if child != nil {
childPath := append(append([]byte{}, path...), byte(i))
childInfo := a.computeSubtreeInfo(child, childPath, owner)
if childInfo != nil {
if childInfo.height+1 > maxHeight {
maxHeight = childInfo.height + 1
}
// Adjust relative paths to include the branch index
for _, leaf := range childInfo.leaves {
leaf.Path = append([]byte{byte(i)}, leaf.Path...)
}
allLeaves = append(allLeaves, childInfo.leaves...)
allPaths = append(allPaths, childInfo.nodePaths...)
}
}
}
if len(allLeaves) == 0 {
return nil
}
return &subtreeInfo{
path: copyBytes(path),
owner: owner,
height: maxHeight,
leaves: allLeaves,
nodePaths: allPaths,
}
case hashNode:
resolved, err := a.resolveNode(n, path, owner)
if err != nil {
log.Debug("Failed to resolve hashNode", "path", common.Bytes2Hex(path), "err", err)
return nil
}
return a.computeSubtreeInfo(resolved, path, owner)
case *expiredNode:
// Already archived, skip
return nil
}
return nil
}
// archiveSubtree writes leaves to archive and replaces subtree with expiredNode.
func (a *Archiver) archiveSubtree(info *subtreeInfo) error {
if a.dryRun {
log.Info("Would archive subtree",
"path", common.Bytes2Hex(info.path),
"owner", info.owner,
"height", info.height,
"leaves", len(info.leaves),
"nodes", len(info.nodePaths))
return nil
}
// 1. Write to archive file
offset, size, err := a.writer.WriteSubtree(info.leaves)
if err != nil {
return fmt.Errorf("failed to write subtree to archive: %w", err)
}
// 2. Sync to ensure durability before modifying DB
if err := a.writer.Sync(); err != nil {
return fmt.Errorf("failed to sync archive: %w", err)
}
// 3. Batch database operations
batch := a.db.NewBatch()
// Delete all nodes in subtree (except the root which we'll overwrite)
for _, nodePath := range info.nodePaths[1:] { // Skip first (root)
if info.owner == (common.Hash{}) {
rawdb.DeleteAccountTrieNode(batch, nodePath)
} else {
rawdb.DeleteStorageTrieNode(batch, info.owner, nodePath)
}
a.bytesDeleted += uint64(len(nodePath))
}
// Write expiredNode at subtree root
expiredBlob := encodeExpiredNodeBlob(offset, size)
if info.owner == (common.Hash{}) {
rawdb.WriteAccountTrieNode(batch, info.path, expiredBlob)
} else {
rawdb.WriteStorageTrieNode(batch, info.owner, info.path, expiredBlob)
}
if err := batch.Write(); err != nil {
return fmt.Errorf("failed to write batch: %w", err)
}
log.Debug("Archived subtree",
"path", common.Bytes2Hex(info.path),
"owner", info.owner,
"leaves", len(info.leaves),
"offset", offset,
"size", size)
return nil
}
// maybeCompact runs database compaction if the threshold is reached.
func (a *Archiver) maybeCompact() error {
if a.compactionInterval == 0 {
return nil
}
if a.subtreesArchived-a.lastCompaction >= a.compactionInterval {
log.Info("Running database compaction", "subtrees", a.subtreesArchived)
if err := a.db.Compact(nil, nil); err != nil {
return err
}
a.lastCompaction = a.subtreesArchived
}
return nil
}
// resolveNode resolves a hashNode to its actual node content.
func (a *Archiver) resolveNode(hash hashNode, path []byte, owner common.Hash) (node, error) {
reader, err := a.triedb.NodeReader(a.stateRoot)
if err != nil {
return nil, err
}
blob, err := reader.Node(owner, path, common.BytesToHash(hash))
if err != nil {
return nil, err
}
return decodeNodeUnsafe(hash, blob)
}
// encodeExpiredNodeBlob creates the raw bytes for an expiredNode.
// Format: 1-byte marker (0x00) + 8-byte offset + 8-byte size = 17 bytes
func encodeExpiredNodeBlob(offset, size uint64) []byte {
buf := make([]byte, 1+2*archive.OffsetSize) // 17 bytes
buf[0] = expiredNodeMarker // 0x00
binary.BigEndian.PutUint64(buf[1:], offset)
binary.BigEndian.PutUint64(buf[1+archive.OffsetSize:], size)
return buf
}
// Stats returns archival statistics.
func (a *Archiver) Stats() (subtrees, leaves, bytesDeleted uint64) {
return a.subtreesArchived, a.leavesArchived, a.bytesDeleted
}
// copyBytes returns a copy of the given byte slice.
func copyBytes(b []byte) []byte {
if b == nil {
return nil
}
c := make([]byte, len(b))
copy(c, b)
return c
}