diff --git a/cmd/geth/archivecmd.go b/cmd/geth/archivecmd.go new file mode 100644 index 0000000000..12712b99dd --- /dev/null +++ b/cmd/geth/archivecmd.go @@ -0,0 +1,201 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +package main + +import ( + "errors" + "fmt" + "path/filepath" + "slices" + "time" + + "github.com/ethereum/go-ethereum/cmd/utils" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/archive" + "github.com/urfave/cli/v2" +) + +var ( + // Flags for the archive command + archiveOutputFlag = &cli.StringFlag{ + Name: "output", + Usage: "Path to archive output file", + Value: "", // Default: /nodearchive + } + archiveCompactionIntervalFlag = &cli.Uint64Flag{ + Name: "compaction-interval", + Usage: "Run compaction after this many subtrees (0 = disable)", + Value: 1000, + } + archiveDryRunFlag = &cli.BoolFlag{ + Name: "dry-run", + Usage: "Simulate without modifying database", + } + + // Commands + archiveCommand = &cli.Command{ + Name: "archive", + Usage: "Archive state trie nodes to reduce database size", + Subcommands: []*cli.Command{ + archiveGenerateCmd, + }, + } + + archiveGenerateCmd = &cli.Command{ + Name: "generate", + Usage: "Generate archive files from height-3 subtrees", + ArgsUsage: "[state-root]", + Action: archiveGenerate, + Flags: slices.Concat([]cli.Flag{ + archiveOutputFlag, + archiveCompactionIntervalFlag, + archiveDryRunFlag, + }, utils.NetworkFlags, utils.DatabaseFlags), + Description: ` +Walks the state trie of the specified root (or head block) and archives +subtrees at height 3. Each archived subtree is replaced with an expiredNode +that references the archive file offset and size. + +Height is measured from leaves: leaves=0, parents=1, etc. A height-3 node +has leaves at most 3 levels below it. + +Examples: + # Archive from head state + geth archive generate --datadir /path/to/datadir + + # Dry run to see what would be archived + geth archive generate --dry-run --datadir /path/to/datadir + + # Archive from a specific state root + geth archive generate 0x1234...abcd --datadir /path/to/datadir + + # Custom output and compaction interval + geth archive generate --output /path/to/archive --compaction-interval 500 +`, + } +) + +func archiveGenerate(ctx *cli.Context) error { + // 1. Setup node and databases + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + // Open database in write mode (readOnly=false) unless dry-run + dryRun := ctx.Bool(archiveDryRunFlag.Name) + chaindb := utils.MakeChainDatabase(ctx, stack, dryRun) + defer chaindb.Close() + + // Check state scheme - we only support PathDB + scheme := cycleCheckScheme(ctx, chaindb) + if scheme != rawdb.PathScheme { + return fmt.Errorf("archive generation requires path-based state scheme, got: %s", scheme) + } + + triedb := utils.MakeTrieDatabase(ctx, stack, chaindb, false, false, false) + defer triedb.Close() + + // 2. Determine state root + var root common.Hash + if ctx.NArg() > 0 { + root = common.HexToHash(ctx.Args().First()) + log.Info("Using specified state root", "root", root) + } else { + headBlock := rawdb.ReadHeadBlock(chaindb) + if headBlock == nil { + return errors.New("no head block found - specify a state root or sync the chain first") + } + root = headBlock.Root() + log.Info("Using head block state", "number", headBlock.NumberU64(), "root", root) + } + + // Verify the state exists + if !rawdb.HasAccountTrieNode(chaindb, nil) { + return errors.New("state trie not found in database") + } + + // 3. Open archive writer (unless dry-run) + var writer *archive.ArchiveWriter + archivePath := ctx.String(archiveOutputFlag.Name) + if archivePath == "" { + archivePath = filepath.Join(stack.ResolvePath(""), "nodearchive") + } + + if !dryRun { + var err error + writer, err = archive.NewArchiveWriter(archivePath) + if err != nil { + return fmt.Errorf("failed to open archive file %s: %w", archivePath, err) + } + defer writer.Close() + log.Info("Opened archive file", "path", archivePath) + } else { + log.Info("Dry run mode - no changes will be made") + } + + // 4. Create and run archiver + archiver := trie.NewArchiver( + chaindb, + triedb, + writer, + ctx.Uint64(archiveCompactionIntervalFlag.Name), + dryRun, + ) + + start := time.Now() + if err := archiver.ProcessState(root); err != nil { + return fmt.Errorf("archive generation failed: %w", err) + } + + // 5. Get stats and optionally run final compaction + subtrees, leaves, bytesDeleted := archiver.Stats() + + if !dryRun && subtrees > 0 { + log.Info("Running final database compaction") + if err := chaindb.Compact(nil, nil); err != nil { + log.Warn("Final compaction failed", "err", err) + } + } + + // 6. Print summary + var archiveSize uint64 + if writer != nil { + archiveSize = writer.Offset() + } + + log.Info("Archive generation complete", + "subtrees", subtrees, + "leaves", leaves, + "bytesDeleted", bytesDeleted, + "archiveSize", archiveSize, + "elapsed", common.PrettyDuration(time.Since(start))) + + if dryRun { + log.Info("This was a dry run - no changes were made to the database") + } + + return nil +} + +// cycleCheckScheme returns the state scheme for the database. +// It's a helper to check what scheme is in use. +func cycleCheckScheme(ctx *cli.Context, db ethdb.Database) string { + return rawdb.ReadStateScheme(db) +} diff --git a/cmd/geth/main.go b/cmd/geth/main.go index e547256e00..0c9e71c8a1 100644 --- a/cmd/geth/main.go +++ b/cmd/geth/main.go @@ -239,6 +239,8 @@ func init() { dumpConfigCommand, // see dbcmd.go dbCommand, + // See archivecmd.go + archiveCommand, // See cmd/utils/flags_legacy.go utils.ShowDeprecated, // See snapshot.go diff --git a/trie/archive/writer.go b/trie/archive/writer.go new file mode 100644 index 0000000000..98b4ecce4b --- /dev/null +++ b/trie/archive/writer.go @@ -0,0 +1,92 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package archive + +import ( + "os" + "sync" + + "github.com/ethereum/go-ethereum/rlp" +) + +// ArchiveWriter is an append-only writer for archive files. +// It writes RLP-encoded records to a file and tracks the current offset. +type ArchiveWriter struct { + file *os.File + offset uint64 + mu sync.Mutex +} + +// NewArchiveWriter creates a new archive writer that appends to the given file. +// If the file exists, it will be opened in append mode and writing continues +// from the current end of file. If it doesn't exist, it will be created. +func NewArchiveWriter(path string) (*ArchiveWriter, error) { + file, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return nil, err + } + info, err := file.Stat() + if err != nil { + file.Close() + return nil, err + } + return &ArchiveWriter{ + file: file, + offset: uint64(info.Size()), + }, nil +} + +// WriteSubtree writes all records belonging to a subtree and returns +// the starting offset and total size of the written data. +// This is the atomic unit of archival - all records for a subtree are +// written together and can be retrieved together using the returned +// offset and size. +func (w *ArchiveWriter) WriteSubtree(records []*Record) (offset uint64, size uint64, err error) { + w.mu.Lock() + defer w.mu.Unlock() + + startOffset := w.offset + for _, rec := range records { + encoded, err := rlp.EncodeToBytes(rec) + if err != nil { + return 0, 0, err + } + if _, err := w.file.Write(encoded); err != nil { + return 0, 0, err + } + w.offset += uint64(len(encoded)) + } + return startOffset, w.offset - startOffset, nil +} + +// Sync flushes the file to disk. This should be called after writing +// a subtree and before modifying the database to ensure crash consistency. +func (w *ArchiveWriter) Sync() error { + return w.file.Sync() +} + +// Close closes the archive file. +func (w *ArchiveWriter) Close() error { + return w.file.Close() +} + +// Offset returns the current write offset in the file. +func (w *ArchiveWriter) Offset() uint64 { + w.mu.Lock() + defer w.mu.Unlock() + return w.offset +} diff --git a/trie/archiver.go b/trie/archiver.go new file mode 100644 index 0000000000..0814da10e8 --- /dev/null +++ b/trie/archiver.go @@ -0,0 +1,403 @@ +// Copyright 2026 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "encoding/binary" + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/archive" + "github.com/ethereum/go-ethereum/triedb/database" +) + +// subtreeInfo holds information about a subtree to be archived. +// It contains all the data needed to write the subtree to an archive +// and replace it with an expiredNode in the database. +type subtreeInfo struct { + path []byte // Hex-encoded path to subtree root + owner common.Hash // Zero for account trie, account hash for storage + height int // Height of subtree (from leaves) + leaves []*archive.Record // All leaf records (relative path + encoded node) + nodePaths [][]byte // Paths of all nodes to delete +} + +// Archiver handles the archival process of trie nodes. +// It walks the state trie, identifies subtrees at height 3, +// archives their leaf data, and replaces them with expiredNode markers. +type Archiver struct { + db ethdb.Database + triedb database.NodeDatabase + writer *archive.ArchiveWriter + compactionInterval uint64 + dryRun bool + stateRoot common.Hash + + // Progress tracking + subtreesArchived uint64 + bytesDeleted uint64 + leavesArchived uint64 + lastCompaction uint64 +} + +// NewArchiver creates a new archiver instance. +// +// Parameters: +// - db: The underlying key-value database +// - triedb: The trie database for reading nodes +// - writer: Archive file writer (can be nil for dry run) +// - compactionInterval: Run compaction after this many subtrees (0 = disable) +// - dryRun: If true, don't modify the database +func NewArchiver(db ethdb.Database, triedb database.NodeDatabase, + writer *archive.ArchiveWriter, compactionInterval uint64, dryRun bool) *Archiver { + return &Archiver{ + db: db, + triedb: triedb, + writer: writer, + compactionInterval: compactionInterval, + dryRun: dryRun, + } +} + +// ProcessState archives subtrees from the given state root. +// It processes the account trie first, then all storage tries. +func (a *Archiver) ProcessState(root common.Hash) error { + a.stateRoot = root + + // Process account trie (owner = zero hash) + log.Info("Processing account trie", "root", root) + accountTrie, err := New(StateTrieID(root), a.triedb) + if err != nil { + return fmt.Errorf("failed to open account trie: %w", err) + } + + if err := a.processTrie(common.Hash{}, accountTrie); err != nil { + return fmt.Errorf("failed to process account trie: %w", err) + } + + // Process storage tries for accounts with storage + log.Info("Processing storage tries") + iter, err := accountTrie.NodeIterator(nil) + if err != nil { + return fmt.Errorf("failed to create account iterator: %w", err) + } + + kvIter := NewIterator(iter) + for kvIter.Next() { + // Decode the account to check for storage + var acc types.StateAccount + if err := rlp.DecodeBytes(kvIter.Value, &acc); err != nil { + log.Warn("Failed to decode account", "err", err) + continue + } + if acc.Root == types.EmptyRootHash { + continue + } + + // Process this account's storage trie + accountHash := common.BytesToHash(kvIter.Key) + storageID := StorageTrieID(root, accountHash, acc.Root) + storageTrie, err := New(storageID, a.triedb) + if err != nil { + log.Warn("Failed to open storage trie", "account", accountHash, "err", err) + continue + } + + if err := a.processTrie(accountHash, storageTrie); err != nil { + log.Warn("Failed to process storage trie", "account", accountHash, "err", err) + } + } + + if kvIter.Err != nil { + return fmt.Errorf("account iteration error: %w", kvIter.Err) + } + + return nil +} + +// processTrie finds and archives all height-3 subtrees in the trie. +func (a *Archiver) processTrie(owner common.Hash, t *Trie) error { + if t.root == nil { + return nil + } + + subtrees := a.findHeight3Subtrees(t.root, nil, owner) + log.Info("Found subtrees to archive", "owner", owner, "count", len(subtrees)) + + for _, info := range subtrees { + if err := a.archiveSubtree(info); err != nil { + log.Warn("Failed to archive subtree", "path", common.Bytes2Hex(info.path), "err", err) + continue + } + a.subtreesArchived++ + a.leavesArchived += uint64(len(info.leaves)) + + if err := a.maybeCompact(); err != nil { + log.Warn("Compaction failed", "err", err) + } + } + return nil +} + +// findHeight3Subtrees recursively finds all subtrees with height == 3. +// Height is measured from leaves: leaves=0, their parents=1, etc. +func (a *Archiver) findHeight3Subtrees(n node, path []byte, owner common.Hash) []*subtreeInfo { + info := a.computeSubtreeInfo(n, path, owner) + if info == nil { + return nil + } + + // If this subtree has height 3, it's a candidate for archival + if info.height == 3 { + return []*subtreeInfo{info} + } + + // If height > 3, recurse into children to find height-3 subtrees + if info.height > 3 { + var results []*subtreeInfo + switch n := n.(type) { + case *fullNode: + for i, child := range n.Children[:16] { + if child != nil { + childPath := append(append([]byte{}, path...), byte(i)) + results = append(results, a.findHeight3Subtrees(child, childPath, owner)...) + } + } + case *shortNode: + childPath := append(append([]byte{}, path...), n.Key...) + results = append(results, a.findHeight3Subtrees(n.Val, childPath, owner)...) + case hashNode: + // Resolve and recurse + resolved, err := a.resolveNode(n, path, owner) + if err == nil { + results = append(results, a.findHeight3Subtrees(resolved, path, owner)...) + } + } + return results + } + + // Height < 3: no archivable subtrees here + return nil +} + +// computeSubtreeInfo computes height and collects leaves for a subtree. +// Returns nil if the node is nil or an error occurs during resolution. +func (a *Archiver) computeSubtreeInfo(n node, path []byte, owner common.Hash) *subtreeInfo { + switch n := n.(type) { + case nil: + return nil + + case valueNode: + // Leaf: height 0 + // Encode the leaf as a shortNode for archive storage + return &subtreeInfo{ + path: copyBytes(path), + owner: owner, + height: 0, + leaves: []*archive.Record{{ + Path: nil, // Empty relative path for leaf at root + Value: []byte(n), + }}, + nodePaths: [][]byte{copyBytes(path)}, + } + + case *shortNode: + childPath := append(append([]byte{}, path...), n.Key...) + childInfo := a.computeSubtreeInfo(n.Val, childPath, owner) + if childInfo == nil { + return nil + } + + // Adjust relative paths in leaves to include this node's key + for _, leaf := range childInfo.leaves { + leaf.Path = append(append([]byte{}, n.Key...), leaf.Path...) + } + + return &subtreeInfo{ + path: copyBytes(path), + owner: owner, + height: childInfo.height + 1, + leaves: childInfo.leaves, + nodePaths: append([][]byte{copyBytes(path)}, childInfo.nodePaths...), + } + + case *fullNode: + var ( + maxHeight = 0 + allLeaves []*archive.Record + allPaths = [][]byte{copyBytes(path)} + ) + for i, child := range n.Children[:16] { + if child != nil { + childPath := append(append([]byte{}, path...), byte(i)) + childInfo := a.computeSubtreeInfo(child, childPath, owner) + if childInfo != nil { + if childInfo.height+1 > maxHeight { + maxHeight = childInfo.height + 1 + } + // Adjust relative paths to include the branch index + for _, leaf := range childInfo.leaves { + leaf.Path = append([]byte{byte(i)}, leaf.Path...) + } + allLeaves = append(allLeaves, childInfo.leaves...) + allPaths = append(allPaths, childInfo.nodePaths...) + } + } + } + + if len(allLeaves) == 0 { + return nil + } + + return &subtreeInfo{ + path: copyBytes(path), + owner: owner, + height: maxHeight, + leaves: allLeaves, + nodePaths: allPaths, + } + + case hashNode: + resolved, err := a.resolveNode(n, path, owner) + if err != nil { + log.Debug("Failed to resolve hashNode", "path", common.Bytes2Hex(path), "err", err) + return nil + } + return a.computeSubtreeInfo(resolved, path, owner) + + case *expiredNode: + // Already archived, skip + return nil + } + return nil +} + +// archiveSubtree writes leaves to archive and replaces subtree with expiredNode. +func (a *Archiver) archiveSubtree(info *subtreeInfo) error { + if a.dryRun { + log.Info("Would archive subtree", + "path", common.Bytes2Hex(info.path), + "owner", info.owner, + "height", info.height, + "leaves", len(info.leaves), + "nodes", len(info.nodePaths)) + return nil + } + + // 1. Write to archive file + offset, size, err := a.writer.WriteSubtree(info.leaves) + if err != nil { + return fmt.Errorf("failed to write subtree to archive: %w", err) + } + + // 2. Sync to ensure durability before modifying DB + if err := a.writer.Sync(); err != nil { + return fmt.Errorf("failed to sync archive: %w", err) + } + + // 3. Batch database operations + batch := a.db.NewBatch() + + // Delete all nodes in subtree (except the root which we'll overwrite) + for _, nodePath := range info.nodePaths[1:] { // Skip first (root) + if info.owner == (common.Hash{}) { + rawdb.DeleteAccountTrieNode(batch, nodePath) + } else { + rawdb.DeleteStorageTrieNode(batch, info.owner, nodePath) + } + a.bytesDeleted += uint64(len(nodePath)) + } + + // Write expiredNode at subtree root + expiredBlob := encodeExpiredNodeBlob(offset, size) + if info.owner == (common.Hash{}) { + rawdb.WriteAccountTrieNode(batch, info.path, expiredBlob) + } else { + rawdb.WriteStorageTrieNode(batch, info.owner, info.path, expiredBlob) + } + + if err := batch.Write(); err != nil { + return fmt.Errorf("failed to write batch: %w", err) + } + + log.Debug("Archived subtree", + "path", common.Bytes2Hex(info.path), + "owner", info.owner, + "leaves", len(info.leaves), + "offset", offset, + "size", size) + + return nil +} + +// maybeCompact runs database compaction if the threshold is reached. +func (a *Archiver) maybeCompact() error { + if a.compactionInterval == 0 { + return nil + } + if a.subtreesArchived-a.lastCompaction >= a.compactionInterval { + log.Info("Running database compaction", "subtrees", a.subtreesArchived) + if err := a.db.Compact(nil, nil); err != nil { + return err + } + a.lastCompaction = a.subtreesArchived + } + return nil +} + +// resolveNode resolves a hashNode to its actual node content. +func (a *Archiver) resolveNode(hash hashNode, path []byte, owner common.Hash) (node, error) { + reader, err := a.triedb.NodeReader(a.stateRoot) + if err != nil { + return nil, err + } + blob, err := reader.Node(owner, path, common.BytesToHash(hash)) + if err != nil { + return nil, err + } + return decodeNodeUnsafe(hash, blob) +} + +// encodeExpiredNodeBlob creates the raw bytes for an expiredNode. +// Format: 1-byte marker (0x00) + 8-byte offset + 8-byte size = 17 bytes +func encodeExpiredNodeBlob(offset, size uint64) []byte { + buf := make([]byte, 1+2*archive.OffsetSize) // 17 bytes + buf[0] = expiredNodeMarker // 0x00 + binary.BigEndian.PutUint64(buf[1:], offset) + binary.BigEndian.PutUint64(buf[1+archive.OffsetSize:], size) + return buf +} + +// Stats returns archival statistics. +func (a *Archiver) Stats() (subtrees, leaves, bytesDeleted uint64) { + return a.subtreesArchived, a.leavesArchived, a.bytesDeleted +} + +// copyBytes returns a copy of the given byte slice. +func copyBytes(b []byte) []byte { + if b == nil { + return nil + } + c := make([]byte, len(b)) + copy(c, b) + return c +}