mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-06-19 13:21:37 +00:00
Merge a764b36449 into 7c9032dff6
This commit is contained in:
commit
8b6f607c4e
19 changed files with 2473 additions and 13 deletions
576
cmd/geth/archivecmd.go
Normal file
576
cmd/geth/archivecmd.go
Normal file
|
|
@ -0,0 +1,576 @@
|
|||
// Copyright 2026 The go-ethereum Authors
|
||||
// This file is part of go-ethereum.
|
||||
//
|
||||
// go-ethereum is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// go-ethereum is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"time"
|
||||
|
||||
"github.com/ethereum/go-ethereum/cmd/utils"
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/core/rawdb"
|
||||
"github.com/ethereum/go-ethereum/core/types"
|
||||
"github.com/ethereum/go-ethereum/crypto"
|
||||
"github.com/ethereum/go-ethereum/ethdb"
|
||||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/rlp"
|
||||
"github.com/ethereum/go-ethereum/trie"
|
||||
"github.com/ethereum/go-ethereum/trie/archive"
|
||||
"github.com/ethereum/go-ethereum/triedb/database"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
// Flags for the archive command
|
||||
archiveOutputFlag = &cli.StringFlag{
|
||||
Name: "output",
|
||||
Usage: "Path to archive output file",
|
||||
Value: "", // Default: <datadir>/nodearchive
|
||||
}
|
||||
archiveCompactionIntervalFlag = &cli.Uint64Flag{
|
||||
Name: "compaction-interval",
|
||||
Usage: "Run compaction after this many subtrees (0 = disable)",
|
||||
Value: 1000,
|
||||
}
|
||||
archiveDryRunFlag = &cli.BoolFlag{
|
||||
Name: "dry-run",
|
||||
Usage: "Simulate without modifying database",
|
||||
}
|
||||
|
||||
// Commands
|
||||
archiveCheckNodeFlag = &cli.StringFlag{
|
||||
Name: "owner",
|
||||
Usage: "Owner hash (hex) for the trie node to check",
|
||||
}
|
||||
archiveCheckPathFlag = &cli.StringFlag{
|
||||
Name: "path",
|
||||
Usage: "Path (hex nibbles) of the trie node to check",
|
||||
}
|
||||
|
||||
archiveCommand = &cli.Command{
|
||||
Name: "archive",
|
||||
Usage: "Archive state trie nodes to reduce database size",
|
||||
Subcommands: []*cli.Command{
|
||||
archiveGenerateCmd,
|
||||
archiveVerifyCmd,
|
||||
archiveDeleteJournalCmd,
|
||||
archiveCheckNodeCmd,
|
||||
},
|
||||
}
|
||||
|
||||
archiveCheckNodeCmd = &cli.Command{
|
||||
Name: "check-node",
|
||||
Usage: "Check if a specific trie node exists in the raw DB",
|
||||
Action: archiveCheckNode,
|
||||
Flags: slices.Concat([]cli.Flag{
|
||||
archiveCheckNodeFlag,
|
||||
archiveCheckPathFlag,
|
||||
}, utils.NetworkFlags, utils.DatabaseFlags),
|
||||
}
|
||||
|
||||
archiveDeleteJournalCmd = &cli.Command{
|
||||
Name: "delete-journal",
|
||||
Usage: "Delete the pathdb journal to force a clean restart",
|
||||
Action: archiveDeleteJournal,
|
||||
Flags: slices.Concat(utils.NetworkFlags, utils.DatabaseFlags),
|
||||
Description: `
|
||||
Deletes the pathdb journal (TrieJournal key and merkle.journal file) from the
|
||||
database. This forces geth to restart with a bare disk layer, discarding any
|
||||
in-memory diff layers that may be inconsistent with archived state.
|
||||
|
||||
Use this after running 'archive generate' if geth was started in between and
|
||||
recreated the journal.
|
||||
|
||||
Examples:
|
||||
geth archive delete-journal --datadir /path/to/datadir
|
||||
geth archive delete-journal --hoodi
|
||||
`,
|
||||
}
|
||||
|
||||
archiveVerifyCmd = &cli.Command{
|
||||
Name: "verify",
|
||||
Usage: "Verify all archived nodes can be correctly resurrected",
|
||||
Action: archiveVerify,
|
||||
Flags: slices.Concat(utils.NetworkFlags, utils.DatabaseFlags),
|
||||
Description: `
|
||||
Walks the entire state trie, resolving every expired node from the archive
|
||||
file and verifying that the reconstructed subtree hash matches the original.
|
||||
Also walks all storage tries referenced by accounts.
|
||||
|
||||
The database is opened read-only. No modifications are made.
|
||||
|
||||
Examples:
|
||||
geth archive verify --datadir /path/to/datadir
|
||||
geth archive verify --hoodi
|
||||
`,
|
||||
}
|
||||
|
||||
archiveGenerateCmd = &cli.Command{
|
||||
Name: "generate",
|
||||
Usage: "Generate archive files from height-3 subtrees",
|
||||
ArgsUsage: "[state-root]",
|
||||
Action: archiveGenerate,
|
||||
Flags: slices.Concat([]cli.Flag{
|
||||
archiveOutputFlag,
|
||||
archiveCompactionIntervalFlag,
|
||||
archiveDryRunFlag,
|
||||
}, utils.NetworkFlags, utils.DatabaseFlags),
|
||||
Description: `
|
||||
Walks the state trie of the specified root (or head block) and archives
|
||||
subtrees at height 3. Each archived subtree is replaced with an expiredNode
|
||||
that references the archive file offset and size.
|
||||
|
||||
Height is measured from leaves: leaves=0, parents=1, etc. A height-3 node
|
||||
has leaves at most 3 levels below it.
|
||||
|
||||
The archiver reads trie nodes directly from the persistent database layer,
|
||||
bypassing any in-memory diff layers. This ensures consistency between the
|
||||
data it reads and the data it modifies.
|
||||
|
||||
Examples:
|
||||
# Archive from the persistent disk state
|
||||
geth archive generate --datadir /path/to/datadir
|
||||
|
||||
# Dry run to see what would be archived
|
||||
geth archive generate --dry-run --datadir /path/to/datadir
|
||||
|
||||
# Custom output and compaction interval
|
||||
geth archive generate --output /path/to/archive --compaction-interval 500
|
||||
`,
|
||||
}
|
||||
)
|
||||
|
||||
// rawDBNodeReader implements database.NodeReader by reading trie nodes directly
|
||||
// from the raw key-value database, bypassing pathdb's in-memory diff layers.
|
||||
// This ensures the archiver sees the same trie state it modifies.
|
||||
type rawDBNodeReader struct {
|
||||
db ethdb.KeyValueReader
|
||||
}
|
||||
|
||||
func (r *rawDBNodeReader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) {
|
||||
var blob []byte
|
||||
if owner == (common.Hash{}) {
|
||||
blob = rawdb.ReadAccountTrieNode(r.db, path)
|
||||
} else {
|
||||
blob = rawdb.ReadStorageTrieNode(r.db, owner, path)
|
||||
}
|
||||
// Skip hash verification: the raw DB may contain expiredNode markers
|
||||
// (blob[0] == 0x00) which have different hashes than the original nodes.
|
||||
return blob, nil
|
||||
}
|
||||
|
||||
// rawDBNodeDatabase implements database.NodeDatabase using direct raw DB reads.
|
||||
type rawDBNodeDatabase struct {
|
||||
db ethdb.KeyValueReader
|
||||
root common.Hash
|
||||
}
|
||||
|
||||
func (d *rawDBNodeDatabase) NodeReader(stateRoot common.Hash) (database.NodeReader, error) {
|
||||
// Only allow reading the persistent disk root state
|
||||
if stateRoot != d.root {
|
||||
return nil, fmt.Errorf("raw DB reader only supports disk root %x, got %x", d.root, stateRoot)
|
||||
}
|
||||
return &rawDBNodeReader{db: d.db}, nil
|
||||
}
|
||||
|
||||
func archiveGenerate(ctx *cli.Context) error {
|
||||
// 1. Setup node and databases
|
||||
stack, _ := makeConfigNode(ctx)
|
||||
defer stack.Close()
|
||||
|
||||
dryRun := ctx.Bool(archiveDryRunFlag.Name)
|
||||
chaindb := utils.MakeChainDatabase(ctx, stack, dryRun)
|
||||
defer chaindb.Close()
|
||||
|
||||
// Check state scheme - we only support PathDB
|
||||
scheme := cycleCheckScheme(ctx, chaindb)
|
||||
if scheme != rawdb.PathScheme {
|
||||
return fmt.Errorf("archive generation requires path-based state scheme, got: %s", scheme)
|
||||
}
|
||||
|
||||
// 2. Flush diff layers to disk via pathdb. This ensures the raw DB
|
||||
// contains the complete, up-to-date state trie and that state history
|
||||
// entries are properly written to the freezer.
|
||||
trieDB := utils.MakeTrieDatabase(ctx, stack, chaindb, false, dryRun, false)
|
||||
head, hasDiff := trieDB.DiffHead()
|
||||
if hasDiff {
|
||||
log.Info("Flushing diff layers to disk", "head", head)
|
||||
if err := trieDB.Commit(head, true); err != nil {
|
||||
trieDB.Close()
|
||||
return fmt.Errorf("failed to flush diff layers: %w", err)
|
||||
}
|
||||
log.Info("Diff layers flushed successfully")
|
||||
} else {
|
||||
log.Info("No diff layers to flush, disk state is current", "root", head)
|
||||
}
|
||||
// Close triedb — we work directly with raw DB for archival.
|
||||
// We'll re-open it at the end to write a fresh journal.
|
||||
trieDB.Close()
|
||||
|
||||
// 3. Determine the disk state root (now up-to-date after flush).
|
||||
rootBlob := rawdb.ReadAccountTrieNode(chaindb, nil)
|
||||
if len(rootBlob) == 0 {
|
||||
return errors.New("state trie not found in database")
|
||||
}
|
||||
root := crypto.Keccak256Hash(rootBlob)
|
||||
log.Info("Using disk state root", "root", root)
|
||||
|
||||
// Create a raw DB node reader that bypasses pathdb layers
|
||||
nodeDB := &rawDBNodeDatabase{db: chaindb, root: root}
|
||||
|
||||
// 4. Open archive writer (unless dry-run).
|
||||
// The archive file is placed at <datadir>/geth/nodearchive by default,
|
||||
// matching the path used by ArchivedNodeResolver when reading back.
|
||||
var writer *archive.ArchiveWriter
|
||||
archivePath := ctx.String(archiveOutputFlag.Name)
|
||||
if archivePath == "" {
|
||||
archivePath = filepath.Join(stack.ResolvePath(""), "nodearchive")
|
||||
}
|
||||
|
||||
if !dryRun {
|
||||
var err error
|
||||
writer, err = archive.NewArchiveWriter(archivePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open archive file %s: %w", archivePath, err)
|
||||
}
|
||||
defer writer.Close()
|
||||
log.Info("Opened archive file", "path", archivePath)
|
||||
} else {
|
||||
log.Info("Dry run mode - no changes will be made")
|
||||
}
|
||||
|
||||
// 5. Create and run archiver
|
||||
archiver := trie.NewArchiver(
|
||||
chaindb,
|
||||
nodeDB,
|
||||
writer,
|
||||
ctx.Uint64(archiveCompactionIntervalFlag.Name),
|
||||
dryRun,
|
||||
)
|
||||
|
||||
start := time.Now()
|
||||
if err := archiver.ProcessState(root); err != nil {
|
||||
return fmt.Errorf("archive generation failed: %w", err)
|
||||
}
|
||||
|
||||
// 6. Get stats and optionally run final compaction
|
||||
subtrees, leaves, bytesDeleted := archiver.Stats()
|
||||
|
||||
if !dryRun && subtrees > 0 {
|
||||
log.Info("Running final database compaction")
|
||||
if err := chaindb.Compact(nil, nil); err != nil {
|
||||
log.Warn("Final compaction failed", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// 7. Re-journal the pathdb state with the current disk root.
|
||||
// After archiving, some trie nodes have been replaced with expired
|
||||
// markers. We re-open pathdb and write a fresh journal (disk layer
|
||||
// only, since all diff layers were flushed in step 2) so that geth
|
||||
// can restart cleanly.
|
||||
if !dryRun {
|
||||
log.Info("Re-journaling pathdb state")
|
||||
freshTrieDB := utils.MakeTrieDatabase(ctx, stack, chaindb, false, false, false)
|
||||
freshRoot := crypto.Keccak256Hash(rawdb.ReadAccountTrieNode(chaindb, nil))
|
||||
if err := freshTrieDB.Journal(freshRoot); err != nil {
|
||||
log.Warn("Failed to re-journal pathdb state", "err", err)
|
||||
}
|
||||
freshTrieDB.Close()
|
||||
}
|
||||
|
||||
// 8. Print summary
|
||||
var archiveSize uint64
|
||||
if writer != nil {
|
||||
archiveSize = writer.Offset()
|
||||
}
|
||||
|
||||
log.Info("Archive generation complete",
|
||||
"subtrees", subtrees,
|
||||
"leaves", leaves,
|
||||
"bytesDeleted", bytesDeleted,
|
||||
"archiveSize", archiveSize,
|
||||
"elapsed", common.PrettyDuration(time.Since(start)))
|
||||
|
||||
if dryRun {
|
||||
log.Info("This was a dry run - no changes were made to the database")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func archiveVerify(ctx *cli.Context) error {
|
||||
stack, _ := makeConfigNode(ctx)
|
||||
defer stack.Close()
|
||||
|
||||
// Open database read-only
|
||||
chaindb := utils.MakeChainDatabase(ctx, stack, true)
|
||||
defer chaindb.Close()
|
||||
|
||||
scheme := cycleCheckScheme(ctx, chaindb)
|
||||
if scheme != rawdb.PathScheme {
|
||||
return fmt.Errorf("archive verify requires path-based state scheme, got: %s", scheme)
|
||||
}
|
||||
|
||||
// Set archive data dir so ArchivedNodeResolver can find the file
|
||||
// ResolvePath("") returns the node's data directory (e.g. .ethereum/hoodi/geth),
|
||||
// but ArchivedNodeResolver expects the instance directory (.ethereum/hoodi)
|
||||
// since it appends "geth/nodearchive" itself.
|
||||
archive.ArchiveDataDir = filepath.Dir(stack.ResolvePath(""))
|
||||
|
||||
// Compute disk root
|
||||
rootBlob := rawdb.ReadAccountTrieNode(chaindb, nil)
|
||||
if len(rootBlob) == 0 {
|
||||
return errors.New("state trie not found in database")
|
||||
}
|
||||
root := crypto.Keccak256Hash(rootBlob)
|
||||
log.Info("Verifying archived nodes", "root", root)
|
||||
|
||||
nodeDB := &rawDBNodeDatabase{db: chaindb, root: root}
|
||||
|
||||
// Open account trie
|
||||
accountTrie, err := trie.New(trie.StateTrieID(root), nodeDB)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open account trie: %w", err)
|
||||
}
|
||||
|
||||
var (
|
||||
totalAccounts int
|
||||
totalStorageTries int
|
||||
totalLeaves int
|
||||
totalExpired int
|
||||
totalErrors int
|
||||
start = time.Now()
|
||||
lastLog = time.Now()
|
||||
)
|
||||
|
||||
// Walk the account trie — this resolves all expired nodes and verifies hashes
|
||||
accountStats, err := accountTrie.Walk(func(path []byte, value []byte) error {
|
||||
totalAccounts++
|
||||
if time.Since(lastLog) > 30*time.Second {
|
||||
log.Info("Verification progress",
|
||||
"accounts", totalAccounts,
|
||||
"storageTries", totalStorageTries,
|
||||
"leaves", totalLeaves,
|
||||
"expired", totalExpired,
|
||||
"errors", totalErrors)
|
||||
lastLog = time.Now()
|
||||
}
|
||||
|
||||
// Decode account to check for storage trie
|
||||
var acc types.StateAccount
|
||||
if err := rlp.DecodeBytes(value, &acc); err != nil {
|
||||
log.Warn("Failed to decode account", "err", err)
|
||||
totalErrors++
|
||||
return nil // continue walking
|
||||
}
|
||||
if acc.Root == types.EmptyRootHash {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Open and walk storage trie.
|
||||
// path is hex-nibble encoded (with a 16 terminator from the trie key),
|
||||
// so convert nibble pairs back to the 32-byte account hash.
|
||||
nibbles := path
|
||||
if len(nibbles) > 0 && nibbles[len(nibbles)-1] == 16 {
|
||||
nibbles = nibbles[:len(nibbles)-1]
|
||||
}
|
||||
keyBytes := make([]byte, len(nibbles)/2)
|
||||
for i := 0; i < len(nibbles); i += 2 {
|
||||
keyBytes[i/2] = nibbles[i]<<4 | nibbles[i+1]
|
||||
}
|
||||
accountHash := common.BytesToHash(keyBytes)
|
||||
storageID := trie.StorageTrieID(root, accountHash, acc.Root)
|
||||
storageTrie, err := trie.New(storageID, nodeDB)
|
||||
if err != nil {
|
||||
log.Warn("Failed to open storage trie", "account", accountHash, "err", err)
|
||||
totalErrors++
|
||||
return nil
|
||||
}
|
||||
|
||||
storageStats, err := storageTrie.Walk(func(spath []byte, svalue []byte) error {
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
log.Warn("Storage trie walk failed", "account", accountHash, "err", err)
|
||||
totalErrors++
|
||||
return nil
|
||||
}
|
||||
totalStorageTries++
|
||||
totalLeaves += storageStats.Leaves
|
||||
totalExpired += storageStats.ExpiredResolved
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("account trie walk failed: %w", err)
|
||||
}
|
||||
|
||||
totalLeaves += accountStats.Leaves
|
||||
totalExpired += accountStats.ExpiredResolved
|
||||
|
||||
log.Info("Archive verification complete",
|
||||
"accounts", totalAccounts,
|
||||
"storageTries", totalStorageTries,
|
||||
"totalLeaves", totalLeaves,
|
||||
"expiredResolved", totalExpired,
|
||||
"errors", totalErrors,
|
||||
"elapsed", common.PrettyDuration(time.Since(start)))
|
||||
|
||||
if totalErrors > 0 {
|
||||
return fmt.Errorf("verification completed with %d errors", totalErrors)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func archiveDeleteJournal(ctx *cli.Context) error {
|
||||
stack, _ := makeConfigNode(ctx)
|
||||
defer stack.Close()
|
||||
|
||||
chaindb := utils.MakeChainDatabase(ctx, stack, false)
|
||||
defer chaindb.Close()
|
||||
|
||||
// Delete the pathdb journal KV key
|
||||
if err := chaindb.Delete([]byte("TrieJournal")); err != nil {
|
||||
log.Warn("Failed to delete pathdb journal key", "err", err)
|
||||
} else {
|
||||
log.Info("Deleted pathdb journal key (TrieJournal)")
|
||||
}
|
||||
|
||||
// Delete the journal file(s) - check both legacy and current locations
|
||||
for _, dir := range []string{"triedb", ""} {
|
||||
for _, name := range []string{"merkle.journal", "verkle.journal"} {
|
||||
journalFile := filepath.Join(stack.ResolvePath(dir), name)
|
||||
if err := os.Remove(journalFile); err == nil {
|
||||
log.Info("Deleted journal file", "path", journalFile)
|
||||
} else if !os.IsNotExist(err) {
|
||||
log.Warn("Failed to delete journal file", "path", journalFile, "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func archiveCheckNode(ctx *cli.Context) error {
|
||||
stack, _ := makeConfigNode(ctx)
|
||||
defer stack.Close()
|
||||
|
||||
chaindb := utils.MakeChainDatabase(ctx, stack, true)
|
||||
defer chaindb.Close()
|
||||
|
||||
ownerHex := ctx.String(archiveCheckNodeFlag.Name)
|
||||
pathHex := ctx.String(archiveCheckPathFlag.Name)
|
||||
|
||||
if ownerHex == "" {
|
||||
return errors.New("--owner flag is required")
|
||||
}
|
||||
|
||||
owner := common.HexToHash(ownerHex)
|
||||
|
||||
// Parse path: hex nibbles like "08" → []byte{0, 8}
|
||||
var path []byte
|
||||
for _, c := range pathHex {
|
||||
var nibble byte
|
||||
switch {
|
||||
case c >= '0' && c <= '9':
|
||||
nibble = byte(c - '0')
|
||||
case c >= 'a' && c <= 'f':
|
||||
nibble = byte(c-'a') + 10
|
||||
case c >= 'A' && c <= 'F':
|
||||
nibble = byte(c-'A') + 10
|
||||
default:
|
||||
return fmt.Errorf("invalid hex char in path: %c", c)
|
||||
}
|
||||
path = append(path, nibble)
|
||||
}
|
||||
|
||||
log.Info("Checking node in raw DB", "owner", owner, "path", fmt.Sprintf("%x", path))
|
||||
|
||||
// Read the node directly from the raw DB
|
||||
isAccount := owner == (common.Hash{})
|
||||
|
||||
// Check the target path and all prefixes up to root
|
||||
for i := len(path); i >= 0; i-- {
|
||||
subpath := path[:i]
|
||||
var blob []byte
|
||||
if isAccount {
|
||||
blob = rawdb.ReadAccountTrieNode(chaindb, subpath)
|
||||
} else {
|
||||
blob = rawdb.ReadStorageTrieNode(chaindb, owner, subpath)
|
||||
}
|
||||
|
||||
status := "MISSING"
|
||||
details := ""
|
||||
if len(blob) > 0 {
|
||||
if blob[0] == 0x00 {
|
||||
status = "EXPIRED"
|
||||
if len(blob) == 17 {
|
||||
offset := binary.BigEndian.Uint64(blob[1:9])
|
||||
size := binary.BigEndian.Uint64(blob[9:17])
|
||||
details = fmt.Sprintf("offset=%d size=%d", offset, size)
|
||||
}
|
||||
} else {
|
||||
status = fmt.Sprintf("PRESENT (%d bytes, first=0x%02x)", len(blob), blob[0])
|
||||
}
|
||||
}
|
||||
label := "prefix"
|
||||
if i == len(path) {
|
||||
label = "TARGET"
|
||||
}
|
||||
if i == 0 {
|
||||
label = "ROOT"
|
||||
}
|
||||
log.Info("Node check",
|
||||
"label", label,
|
||||
"path", fmt.Sprintf("%x", subpath),
|
||||
"pathLen", i,
|
||||
"status", status,
|
||||
"details", details)
|
||||
}
|
||||
|
||||
// Also check a few child paths to see what's below the target
|
||||
for nibble := byte(0); nibble < 16; nibble++ {
|
||||
childPath := append(append([]byte{}, path...), nibble)
|
||||
var blob []byte
|
||||
if isAccount {
|
||||
blob = rawdb.ReadAccountTrieNode(chaindb, childPath)
|
||||
} else {
|
||||
blob = rawdb.ReadStorageTrieNode(chaindb, owner, childPath)
|
||||
}
|
||||
if len(blob) > 0 {
|
||||
status := fmt.Sprintf("PRESENT (%d bytes, first=0x%02x)", len(blob), blob[0])
|
||||
if blob[0] == 0x00 && len(blob) == 17 {
|
||||
offset := binary.BigEndian.Uint64(blob[1:9])
|
||||
size := binary.BigEndian.Uint64(blob[9:17])
|
||||
status = fmt.Sprintf("EXPIRED offset=%d size=%d", offset, size)
|
||||
}
|
||||
log.Info("Child node", "path", fmt.Sprintf("%x", childPath), "status", status)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cycleCheckScheme returns the state scheme for the database.
|
||||
// It's a helper to check what scheme is in use.
|
||||
func cycleCheckScheme(ctx *cli.Context, db ethdb.Database) string {
|
||||
return rawdb.ReadStateScheme(db)
|
||||
}
|
||||
|
|
@ -240,6 +240,8 @@ func init() {
|
|||
dumpConfigCommand,
|
||||
// see dbcmd.go
|
||||
dbCommand,
|
||||
// See archivecmd.go
|
||||
archiveCommand,
|
||||
// See cmd/utils/flags_legacy.go
|
||||
utils.ShowDeprecated,
|
||||
// See snapshot.go
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ import (
|
|||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/p2p"
|
||||
"github.com/ethereum/go-ethereum/rpc"
|
||||
"github.com/ethereum/go-ethereum/trie/archive"
|
||||
"github.com/gofrs/flock"
|
||||
)
|
||||
|
||||
|
|
@ -85,6 +86,7 @@ func New(conf *Config) (*Node, error) {
|
|||
return nil, err
|
||||
}
|
||||
conf.DataDir = absdatadir
|
||||
archive.ArchiveDataDir = absdatadir
|
||||
}
|
||||
if conf.Logger == nil {
|
||||
conf.Logger = log.New()
|
||||
|
|
|
|||
95
trie/archive/archive.go
Normal file
95
trie/archive/archive.go
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
// Copyright 2026 go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package archive
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/ethereum/go-ethereum/rlp"
|
||||
)
|
||||
|
||||
// ResolverFn is a callback to resolve expired nodes from an archive file.
|
||||
// Given an offset and size, it returns the serialized node data from the archive.
|
||||
type ResolverFn func(offset, size uint64) ([]*Record, error)
|
||||
|
||||
// OffsetSize is the size of the file offset in bytes.
|
||||
const OffsetSize = 8
|
||||
|
||||
var (
|
||||
EmptyArchiveRecord = errors.New("empty record") // The archive contained a size-zero record.
|
||||
ErrNoResolver = errors.New("no archive resolver set for expired node") // An expired node is accessed without a resolver.
|
||||
)
|
||||
|
||||
// Record contains an archive file record. It is not the most optimal
|
||||
// structure, since any modification to it will need to be overwritten.
|
||||
type Record struct {
|
||||
Path []byte
|
||||
Value []byte
|
||||
}
|
||||
|
||||
// ArchiveDataDir is the data directory where the archive file is stored.
|
||||
var ArchiveDataDir string
|
||||
|
||||
// ArchivedNodeResolver takes a buffer containing the archive data
|
||||
// held by an expiring node (an offset and a size) and returns a
|
||||
// list of records, which is a list of serialized leaf nodes. The
|
||||
// caller knows the context (MPT, binary trie) and is responsible
|
||||
// for decoding the nodes.
|
||||
func ArchivedNodeResolver(offset, size uint64) ([]*Record, error) {
|
||||
file, err := os.Open(filepath.Join(ArchiveDataDir, "geth", "nodearchive"))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening archive file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
o, err := file.Seek(int64(offset), io.SeekStart)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error seeking into archive file: %w", err)
|
||||
}
|
||||
if uint64(o) != offset {
|
||||
return nil, fmt.Errorf("invalid offset: want %d, got %d", offset, o)
|
||||
}
|
||||
|
||||
data := make([]byte, size)
|
||||
if _, err := io.ReadFull(file, data); err != nil {
|
||||
return nil, fmt.Errorf("error reading data from archive: %w", err)
|
||||
}
|
||||
|
||||
var records []*Record
|
||||
stream := rlp.NewStream(bytes.NewReader(data), uint64(len(data)))
|
||||
for len(data) > 0 {
|
||||
_, size, err := stream.Kind()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting rlp kind from archive data: %w", err)
|
||||
}
|
||||
var record Record
|
||||
err = stream.Decode(&record)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error decoding rlp record from archive data (offset=%d, size=%d): %w", offset, size, err)
|
||||
}
|
||||
records = append(records, &record)
|
||||
}
|
||||
return records, nil
|
||||
}
|
||||
92
trie/archive/writer.go
Normal file
92
trie/archive/writer.go
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
// Copyright 2026 go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package archive
|
||||
|
||||
import (
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/ethereum/go-ethereum/rlp"
|
||||
)
|
||||
|
||||
// ArchiveWriter is an append-only writer for archive files.
|
||||
// It writes RLP-encoded records to a file and tracks the current offset.
|
||||
type ArchiveWriter struct {
|
||||
file *os.File
|
||||
offset uint64
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
// NewArchiveWriter creates a new archive writer that appends to the given file.
|
||||
// If the file exists, it will be opened in append mode and writing continues
|
||||
// from the current end of file. If it doesn't exist, it will be created.
|
||||
func NewArchiveWriter(path string) (*ArchiveWriter, error) {
|
||||
file, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
info, err := file.Stat()
|
||||
if err != nil {
|
||||
file.Close()
|
||||
return nil, err
|
||||
}
|
||||
return &ArchiveWriter{
|
||||
file: file,
|
||||
offset: uint64(info.Size()),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// WriteSubtree writes all records belonging to a subtree and returns
|
||||
// the starting offset and total size of the written data.
|
||||
// This is the atomic unit of archival - all records for a subtree are
|
||||
// written together and can be retrieved together using the returned
|
||||
// offset and size.
|
||||
func (w *ArchiveWriter) WriteSubtree(records []*Record) (offset uint64, size uint64, err error) {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
|
||||
startOffset := w.offset
|
||||
for _, rec := range records {
|
||||
encoded, err := rlp.EncodeToBytes(rec)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
if _, err := w.file.Write(encoded); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
w.offset += uint64(len(encoded))
|
||||
}
|
||||
return startOffset, w.offset - startOffset, nil
|
||||
}
|
||||
|
||||
// Sync flushes the file to disk. This should be called after writing
|
||||
// a subtree and before modifying the database to ensure crash consistency.
|
||||
func (w *ArchiveWriter) Sync() error {
|
||||
return w.file.Sync()
|
||||
}
|
||||
|
||||
// Close closes the archive file.
|
||||
func (w *ArchiveWriter) Close() error {
|
||||
return w.file.Close()
|
||||
}
|
||||
|
||||
// Offset returns the current write offset in the file.
|
||||
func (w *ArchiveWriter) Offset() uint64 {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
return w.offset
|
||||
}
|
||||
599
trie/archiver.go
Normal file
599
trie/archiver.go
Normal file
|
|
@ -0,0 +1,599 @@
|
|||
// Copyright 2026 go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package trie
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/core/rawdb"
|
||||
"github.com/ethereum/go-ethereum/core/types"
|
||||
"github.com/ethereum/go-ethereum/ethdb"
|
||||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/rlp"
|
||||
"github.com/ethereum/go-ethereum/trie/archive"
|
||||
"github.com/ethereum/go-ethereum/triedb/database"
|
||||
)
|
||||
|
||||
// subtreeInfo holds information about a subtree to be archived.
|
||||
// It contains all the data needed to write the subtree to an archive
|
||||
// and replace it with an expiredNode in the database.
|
||||
type subtreeInfo struct {
|
||||
path []byte // Hex-encoded path to subtree root
|
||||
owner common.Hash // Zero for account trie, account hash for storage
|
||||
height int // Height of subtree (from leaves)
|
||||
leaves []*archive.Record // All leaf records (relative path + encoded node)
|
||||
nodePaths [][]byte // Paths of all nodes to delete
|
||||
rootHash common.Hash // Hash of the original subtree root (for verification)
|
||||
}
|
||||
|
||||
// Archiver handles the archival process of trie nodes.
|
||||
// It walks the state trie, identifies subtrees at height 3,
|
||||
// archives their leaf data, and replaces them with expiredNode markers.
|
||||
//
|
||||
// The archiver uses a streaming approach: it walks the trie using a
|
||||
// NodeIterator, probes each node's height via bounded raw DB reads,
|
||||
// and archives subtrees immediately when found. This keeps memory
|
||||
// usage proportional to the iterator stack depth + the current subtree
|
||||
// being processed, rather than loading the entire trie into memory.
|
||||
type Archiver struct {
|
||||
db ethdb.Database
|
||||
triedb database.NodeDatabase
|
||||
writer *archive.ArchiveWriter
|
||||
compactionInterval uint64
|
||||
dryRun bool
|
||||
stateRoot common.Hash
|
||||
|
||||
// Progress tracking
|
||||
subtreesArchived uint64
|
||||
bytesDeleted uint64
|
||||
leavesArchived uint64
|
||||
lastCompaction uint64
|
||||
}
|
||||
|
||||
// NewArchiver creates a new archiver instance.
|
||||
//
|
||||
// Parameters:
|
||||
// - db: The underlying key-value database
|
||||
// - triedb: The trie database for reading nodes
|
||||
// - writer: Archive file writer (can be nil for dry run)
|
||||
// - compactionInterval: Run compaction after this many subtrees (0 = disable)
|
||||
// - dryRun: If true, don't modify the database
|
||||
func NewArchiver(db ethdb.Database, triedb database.NodeDatabase,
|
||||
writer *archive.ArchiveWriter, compactionInterval uint64, dryRun bool) *Archiver {
|
||||
return &Archiver{
|
||||
db: db,
|
||||
triedb: triedb,
|
||||
writer: writer,
|
||||
compactionInterval: compactionInterval,
|
||||
dryRun: dryRun,
|
||||
}
|
||||
}
|
||||
|
||||
// ProcessState archives subtrees from the given state root.
|
||||
// It processes storage tries first, then the account trie.
|
||||
func (a *Archiver) ProcessState(root common.Hash) error {
|
||||
a.stateRoot = root
|
||||
|
||||
accountTrie, err := New(StateTrieID(root), a.triedb)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open account trie: %w", err)
|
||||
}
|
||||
|
||||
log.Info("Processing storage tries")
|
||||
iter, err := accountTrie.NodeIterator(nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create account iterator: %w", err)
|
||||
}
|
||||
|
||||
kvIter := NewIterator(iter)
|
||||
for kvIter.Next() {
|
||||
// Decode the account to check for storage
|
||||
var acc types.StateAccount
|
||||
if err := rlp.DecodeBytes(kvIter.Value, &acc); err != nil {
|
||||
log.Warn("Failed to decode account", "err", err)
|
||||
continue
|
||||
}
|
||||
if acc.Root == types.EmptyRootHash {
|
||||
continue
|
||||
}
|
||||
|
||||
// Process this account's storage trie
|
||||
accountHash := common.BytesToHash(kvIter.Key)
|
||||
storageID := StorageTrieID(root, accountHash, acc.Root)
|
||||
storageTrie, err := New(storageID, a.triedb)
|
||||
if err != nil {
|
||||
log.Warn("Failed to open storage trie", "account", accountHash, "err", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := a.processTrie(accountHash, storageTrie); err != nil {
|
||||
log.Warn("Failed to process storage trie", "account", accountHash, "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
if kvIter.Err != nil {
|
||||
return fmt.Errorf("account iteration error: %w", kvIter.Err)
|
||||
}
|
||||
|
||||
log.Info("Processing account trie", "root", root)
|
||||
if err := a.processTrie(common.Hash{}, accountTrie); err != nil {
|
||||
return fmt.Errorf("failed to process account trie: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// processTrie finds and archives all height-3 subtrees in the trie using
|
||||
// a streaming approach. It walks the trie with a NodeIterator, probes each
|
||||
// node's height via bounded raw DB reads, and archives subtrees immediately.
|
||||
//
|
||||
// Memory usage is O(iterator_stack_depth + current_subtree_size) instead of
|
||||
// O(entire_trie) as with the previous recursive approach.
|
||||
func (a *Archiver) processTrie(owner common.Hash, t *Trie) error {
|
||||
if t.root == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
iter, err := t.NodeIterator(nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create node iterator: %w", err)
|
||||
}
|
||||
|
||||
var (
|
||||
lastLog = time.Now()
|
||||
found uint64
|
||||
)
|
||||
|
||||
for iter.Next(true) {
|
||||
if iter.Leaf() {
|
||||
continue
|
||||
}
|
||||
|
||||
// Progress logging
|
||||
if time.Since(lastLog) > 30*time.Second {
|
||||
log.Info("Scanning trie for subtrees",
|
||||
"owner", owner,
|
||||
"path", common.Bytes2Hex(iter.Path()),
|
||||
"found", found,
|
||||
"archived", a.subtreesArchived)
|
||||
lastLog = time.Now()
|
||||
}
|
||||
|
||||
path := copyBytes(iter.Path())
|
||||
hash := iter.Hash()
|
||||
if hash == (common.Hash{}) {
|
||||
// Embedded node (no hash), skip — it will be part of a
|
||||
// parent subtree.
|
||||
continue
|
||||
}
|
||||
|
||||
// Probe subtree height via bounded raw DB reads.
|
||||
// This does NOT load the trie into memory — it reads blobs from
|
||||
// the DB, decodes them, computes height, and discards them.
|
||||
height := a.probeHeight(owner, path, hash, 3)
|
||||
if height != 3 {
|
||||
// Too small to archive; the iterator will visit children.
|
||||
// Too tall — descend into children to find height-3 subtrees.
|
||||
continue
|
||||
}
|
||||
|
||||
// height == 3: collect and archive this subtree immediately.
|
||||
info := a.collectSubtree(owner, path, hash)
|
||||
if info == nil {
|
||||
continue
|
||||
}
|
||||
found++
|
||||
|
||||
if err := a.archiveSubtree(info); err != nil {
|
||||
log.Warn("Failed to archive subtree", "path", common.Bytes2Hex(path), "err", err)
|
||||
continue
|
||||
}
|
||||
a.subtreesArchived++
|
||||
a.leavesArchived += uint64(len(info.leaves))
|
||||
|
||||
if err := a.maybeCompact(); err != nil {
|
||||
log.Warn("Compaction failed", "err", err)
|
||||
}
|
||||
|
||||
// Skip children — they're now archived.
|
||||
// We call Next(false) to move past the subtree without descending.
|
||||
iter.Next(false)
|
||||
}
|
||||
|
||||
if iter.Error() != nil {
|
||||
return fmt.Errorf("iterator error: %w", iter.Error())
|
||||
}
|
||||
|
||||
log.Info("Found subtrees to archive", "owner", owner, "count", found)
|
||||
return nil
|
||||
}
|
||||
|
||||
// probeHeight computes the height of a node by reading from the raw DB.
|
||||
// It stops early once height exceeds maxHeight (returns maxHeight+1).
|
||||
// The decoded nodes are not retained — they are discarded after inspection.
|
||||
//
|
||||
// Height is measured from leaves: leaves=0, their parents=1, etc.
|
||||
func (a *Archiver) probeHeight(owner common.Hash, path []byte, hash common.Hash, maxHeight int) int {
|
||||
blob := a.readNodeBlob(owner, path)
|
||||
if len(blob) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Already expired — skip.
|
||||
if blob[0] == expiredNodeMarker {
|
||||
return -1
|
||||
}
|
||||
|
||||
n, err := decodeNodeUnsafe(hash[:], blob)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return a.nodeHeight(n, path, owner, maxHeight)
|
||||
}
|
||||
|
||||
// nodeHeight computes the height of a decoded node, bounded by maxHeight.
|
||||
// Returns maxHeight+1 early if the subtree is taller than maxHeight.
|
||||
func (a *Archiver) nodeHeight(n node, path []byte, owner common.Hash, maxHeight int) int {
|
||||
switch n := n.(type) {
|
||||
case nil:
|
||||
return 0
|
||||
|
||||
case valueNode:
|
||||
return 0
|
||||
|
||||
case *shortNode:
|
||||
childPath := append(append([]byte{}, path...), n.Key...)
|
||||
switch child := n.Val.(type) {
|
||||
case valueNode:
|
||||
return 1 // shortNode → leaf
|
||||
case hashNode:
|
||||
if maxHeight <= 1 {
|
||||
return maxHeight + 1
|
||||
}
|
||||
childHeight := a.probeHeight(owner, childPath, common.BytesToHash(child), maxHeight-1)
|
||||
if childHeight < 0 {
|
||||
return -1 // expired child
|
||||
}
|
||||
return childHeight + 1
|
||||
default:
|
||||
// Inline node
|
||||
childHeight := a.nodeHeight(child, childPath, owner, maxHeight-1)
|
||||
if childHeight < 0 {
|
||||
return -1
|
||||
}
|
||||
return childHeight + 1
|
||||
}
|
||||
|
||||
case *fullNode:
|
||||
maxH := 0
|
||||
for i, child := range n.Children[:16] {
|
||||
if child == nil {
|
||||
continue
|
||||
}
|
||||
childPath := append(append([]byte{}, path...), byte(i))
|
||||
var childHeight int
|
||||
switch c := child.(type) {
|
||||
case valueNode:
|
||||
childHeight = 0
|
||||
case hashNode:
|
||||
if maxH+1 > maxHeight {
|
||||
return maxHeight + 1
|
||||
}
|
||||
childHeight = a.probeHeight(owner, childPath, common.BytesToHash(c), maxHeight-1)
|
||||
default:
|
||||
childHeight = a.nodeHeight(c, childPath, owner, maxHeight-1)
|
||||
}
|
||||
if childHeight < 0 {
|
||||
continue // expired child, skip
|
||||
}
|
||||
h := childHeight + 1
|
||||
if h > maxH {
|
||||
maxH = h
|
||||
}
|
||||
if maxH > maxHeight {
|
||||
return maxHeight + 1
|
||||
}
|
||||
}
|
||||
return maxH
|
||||
|
||||
case hashNode:
|
||||
return a.probeHeight(owner, path, common.BytesToHash(n), maxHeight)
|
||||
|
||||
case *expiredNode:
|
||||
return -1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// collectSubtree reads a height-3 subtree from the raw DB and collects its
|
||||
// leaves and node paths for archival. The subtree is bounded (height ≤ 3),
|
||||
// so memory usage is limited.
|
||||
func (a *Archiver) collectSubtree(owner common.Hash, path []byte, hash common.Hash) *subtreeInfo {
|
||||
blob := a.readNodeBlob(owner, path)
|
||||
if len(blob) == 0 {
|
||||
return nil
|
||||
}
|
||||
if blob[0] == expiredNodeMarker {
|
||||
return nil
|
||||
}
|
||||
|
||||
n, err := decodeNodeUnsafe(hash[:], blob)
|
||||
if err != nil {
|
||||
log.Warn("Failed to decode node for collection", "path", common.Bytes2Hex(path), "err", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
info := &subtreeInfo{
|
||||
path: copyBytes(path),
|
||||
owner: owner,
|
||||
rootHash: hash,
|
||||
}
|
||||
|
||||
leaves, nodePaths, height, err := a.collectNodeLeaves(n, path, nil, owner)
|
||||
if err != nil {
|
||||
log.Warn("Failed to collect subtree leaves", "path", common.Bytes2Hex(path), "err", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
info.height = height
|
||||
info.leaves = leaves
|
||||
info.nodePaths = append([][]byte{copyBytes(path)}, nodePaths...)
|
||||
return info
|
||||
}
|
||||
|
||||
// collectNodeLeaves recursively collects all leaves and node paths in a
|
||||
// bounded subtree. relPath is the path relative to the subtree root.
|
||||
// Returns (leaves, nodePaths, height, error).
|
||||
func (a *Archiver) collectNodeLeaves(n node, absPath, relPath []byte, owner common.Hash) ([]*archive.Record, [][]byte, int, error) {
|
||||
switch n := n.(type) {
|
||||
case nil:
|
||||
return nil, nil, 0, nil
|
||||
|
||||
case valueNode:
|
||||
return []*archive.Record{{
|
||||
Path: copyBytes(relPath),
|
||||
Value: []byte(n),
|
||||
}}, nil, 0, nil
|
||||
|
||||
case *shortNode:
|
||||
childAbsPath := append(append([]byte{}, absPath...), n.Key...)
|
||||
var childNode node
|
||||
switch c := n.Val.(type) {
|
||||
case hashNode:
|
||||
resolved, err := a.resolveRawNode(owner, childAbsPath, common.BytesToHash(c))
|
||||
if err != nil {
|
||||
return nil, nil, 0, fmt.Errorf("resolve shortNode child at %s: %w", common.Bytes2Hex(childAbsPath), err)
|
||||
}
|
||||
childNode = resolved
|
||||
default:
|
||||
childNode = c
|
||||
}
|
||||
|
||||
// Pass nil relPath to child — we prepend the key ourselves
|
||||
leaves, nodePaths, height, err := a.collectNodeLeaves(childNode, childAbsPath, nil, owner)
|
||||
if err != nil {
|
||||
return nil, nil, 0, err
|
||||
}
|
||||
|
||||
// Prepend [relPath + extension key] to leaf relative paths
|
||||
prefix := append(append([]byte{}, relPath...), n.Key...)
|
||||
for _, leaf := range leaves {
|
||||
leaf.Path = append(append([]byte{}, prefix...), leaf.Path...)
|
||||
}
|
||||
|
||||
return leaves, append([][]byte{copyBytes(absPath)}, nodePaths...), height + 1, nil
|
||||
|
||||
case *fullNode:
|
||||
var (
|
||||
allLeaves []*archive.Record
|
||||
allPaths [][]byte
|
||||
maxHeight int
|
||||
)
|
||||
for i, child := range n.Children[:16] {
|
||||
if child == nil {
|
||||
continue
|
||||
}
|
||||
childAbsPath := append(append([]byte{}, absPath...), byte(i))
|
||||
|
||||
var childNode node
|
||||
switch c := child.(type) {
|
||||
case hashNode:
|
||||
resolved, err := a.resolveRawNode(owner, childAbsPath, common.BytesToHash(c))
|
||||
if err != nil {
|
||||
return nil, nil, 0, fmt.Errorf("resolve fullNode child[%x] at %s: %w", i, common.Bytes2Hex(childAbsPath), err)
|
||||
}
|
||||
childNode = resolved
|
||||
default:
|
||||
childNode = c
|
||||
}
|
||||
|
||||
// Pass nil relPath to child — we prepend the index ourselves
|
||||
leaves, nodePaths, height, err := a.collectNodeLeaves(childNode, childAbsPath, nil, owner)
|
||||
if err != nil {
|
||||
return nil, nil, 0, err
|
||||
}
|
||||
|
||||
// Prepend [relPath + branch index] to leaf relative paths
|
||||
prefix := append(append([]byte{}, relPath...), byte(i))
|
||||
for _, leaf := range leaves {
|
||||
leaf.Path = append(append([]byte{}, prefix...), leaf.Path...)
|
||||
}
|
||||
|
||||
allLeaves = append(allLeaves, leaves...)
|
||||
allPaths = append(allPaths, nodePaths...)
|
||||
h := height + 1
|
||||
if h > maxHeight {
|
||||
maxHeight = h
|
||||
}
|
||||
}
|
||||
return allLeaves, allPaths, maxHeight, nil
|
||||
|
||||
case hashNode:
|
||||
resolved, err := a.resolveRawNode(owner, absPath, common.BytesToHash(n))
|
||||
if err != nil {
|
||||
return nil, nil, 0, err
|
||||
}
|
||||
return a.collectNodeLeaves(resolved, absPath, relPath, owner)
|
||||
|
||||
case *expiredNode:
|
||||
return nil, nil, 0, nil
|
||||
}
|
||||
return nil, nil, 0, nil
|
||||
}
|
||||
|
||||
// readNodeBlob reads a trie node blob directly from the raw key-value
|
||||
// database, bypassing pathdb layers.
|
||||
func (a *Archiver) readNodeBlob(owner common.Hash, path []byte) []byte {
|
||||
if owner == (common.Hash{}) {
|
||||
return rawdb.ReadAccountTrieNode(a.db, path)
|
||||
}
|
||||
return rawdb.ReadStorageTrieNode(a.db, owner, path)
|
||||
}
|
||||
|
||||
// resolveRawNode reads and decodes a trie node directly from the raw DB.
|
||||
// Unlike resolveNode, this does NOT use the trie database (no caching,
|
||||
// no diff layers). The decoded node is ephemeral and will be GC'd after use.
|
||||
func (a *Archiver) resolveRawNode(owner common.Hash, path []byte, hash common.Hash) (node, error) {
|
||||
blob := a.readNodeBlob(owner, path)
|
||||
if len(blob) == 0 {
|
||||
return nil, fmt.Errorf("node not found: owner=%s path=%s", owner, common.Bytes2Hex(path))
|
||||
}
|
||||
if blob[0] == expiredNodeMarker {
|
||||
return &expiredNode{}, nil
|
||||
}
|
||||
return decodeNodeUnsafe(hash[:], blob)
|
||||
}
|
||||
|
||||
// archiveSubtree writes leaves to archive and replaces subtree with expiredNode.
|
||||
func (a *Archiver) archiveSubtree(info *subtreeInfo) error {
|
||||
if a.dryRun {
|
||||
log.Info("Would archive subtree",
|
||||
"path", common.Bytes2Hex(info.path),
|
||||
"owner", info.owner,
|
||||
"height", info.height,
|
||||
"leaves", len(info.leaves),
|
||||
"nodes", len(info.nodePaths))
|
||||
return nil
|
||||
}
|
||||
|
||||
// 1. Write to archive file
|
||||
offset, size, err := a.writer.WriteSubtree(info.leaves)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write subtree to archive: %w", err)
|
||||
}
|
||||
|
||||
// 2. Sync to ensure durability before modifying DB
|
||||
if err := a.writer.Sync(); err != nil {
|
||||
return fmt.Errorf("failed to sync archive: %w", err)
|
||||
}
|
||||
|
||||
// 3. Verify archive round-trip: reconstruct trie from records and
|
||||
// check that the hash matches the original subtree root. This
|
||||
// catches any data corruption before we delete the original nodes.
|
||||
if info.rootHash != (common.Hash{}) {
|
||||
reconstructed, err := archiveRecordsToNode(info.leaves)
|
||||
if err != nil {
|
||||
return fmt.Errorf("archive verification failed: cannot reconstruct trie from records: %w", err)
|
||||
}
|
||||
h := newHasher(false)
|
||||
gotHash := common.BytesToHash(h.hash(reconstructed, true))
|
||||
returnHasherToPool(h)
|
||||
if gotHash != info.rootHash {
|
||||
return fmt.Errorf("archive verification failed: hash mismatch at path %s owner %s: got %s want %s (leaves=%d offset=%d size=%d)",
|
||||
common.Bytes2Hex(info.path), info.owner, gotHash, info.rootHash,
|
||||
len(info.leaves), offset, size)
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Batch database operations
|
||||
batch := a.db.NewBatch()
|
||||
|
||||
// Delete all nodes in subtree (except the root which we'll overwrite)
|
||||
for _, nodePath := range info.nodePaths[1:] { // Skip first (root)
|
||||
if info.owner == (common.Hash{}) {
|
||||
rawdb.DeleteAccountTrieNode(batch, nodePath)
|
||||
} else {
|
||||
rawdb.DeleteStorageTrieNode(batch, info.owner, nodePath)
|
||||
}
|
||||
a.bytesDeleted += uint64(len(nodePath))
|
||||
}
|
||||
|
||||
// Write expiredNode at subtree root
|
||||
expiredBlob := encodeExpiredNodeBlob(offset, size)
|
||||
if info.owner == (common.Hash{}) {
|
||||
rawdb.WriteAccountTrieNode(batch, info.path, expiredBlob)
|
||||
} else {
|
||||
rawdb.WriteStorageTrieNode(batch, info.owner, info.path, expiredBlob)
|
||||
}
|
||||
|
||||
if err := batch.Write(); err != nil {
|
||||
return fmt.Errorf("failed to write batch: %w", err)
|
||||
}
|
||||
|
||||
log.Debug("Archived subtree",
|
||||
"path", common.Bytes2Hex(info.path),
|
||||
"owner", info.owner,
|
||||
"leaves", len(info.leaves),
|
||||
"offset", offset,
|
||||
"size", size)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// maybeCompact runs database compaction if the threshold is reached.
|
||||
func (a *Archiver) maybeCompact() error {
|
||||
if a.compactionInterval == 0 {
|
||||
return nil
|
||||
}
|
||||
if a.subtreesArchived-a.lastCompaction >= a.compactionInterval {
|
||||
log.Info("Running database compaction", "subtrees", a.subtreesArchived)
|
||||
if err := a.db.Compact(nil, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
a.lastCompaction = a.subtreesArchived
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// encodeExpiredNodeBlob creates the raw bytes for an expiredNode.
|
||||
// Format: 1-byte marker (0x00) + 8-byte offset + 8-byte size = 17 bytes
|
||||
func encodeExpiredNodeBlob(offset, size uint64) []byte {
|
||||
buf := make([]byte, 1+2*archive.OffsetSize) // 17 bytes
|
||||
buf[0] = expiredNodeMarker // 0x00
|
||||
binary.BigEndian.PutUint64(buf[1:], offset)
|
||||
binary.BigEndian.PutUint64(buf[1+archive.OffsetSize:], size)
|
||||
return buf
|
||||
}
|
||||
|
||||
// Stats returns archival statistics.
|
||||
func (a *Archiver) Stats() (subtrees, leaves, bytesDeleted uint64) {
|
||||
return a.subtreesArchived, a.leavesArchived, a.bytesDeleted
|
||||
}
|
||||
|
||||
// copyBytes returns a copy of the given byte slice.
|
||||
func copyBytes(b []byte) []byte {
|
||||
if b == nil {
|
||||
return nil
|
||||
}
|
||||
c := make([]byte, len(b))
|
||||
copy(c, b)
|
||||
return c
|
||||
}
|
||||
|
|
@ -79,6 +79,8 @@ func (c *committer) commit(path []byte, n node, parallel bool) node {
|
|||
return cn
|
||||
case hashNode:
|
||||
return cn
|
||||
case *expiredNode:
|
||||
return cn
|
||||
default:
|
||||
// nil, valuenode shouldn't be committed
|
||||
panic(fmt.Sprintf("%T: invalid node: %v", n, n))
|
||||
|
|
|
|||
262
trie/expired_node.go
Normal file
262
trie/expired_node.go
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
// Copyright 2026 go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package trie
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/rlp"
|
||||
"github.com/ethereum/go-ethereum/trie/archive"
|
||||
)
|
||||
|
||||
// expiredNodeMarker is a special marker byte to identify expired nodes.
|
||||
// Using 0x00 as a marker since valid MPT nodes are always RLP lists (starting with 0xc0+).
|
||||
const expiredNodeMarker = 0x00
|
||||
|
||||
// expiredNode represents a node whose data has been archived.
|
||||
// It stores the file offset and size of the archived data.
|
||||
type expiredNode struct {
|
||||
offset uint64
|
||||
size uint64
|
||||
cachedHash hashNode
|
||||
archiveResolver archive.ResolverFn
|
||||
}
|
||||
|
||||
func (n *expiredNode) cache() (hashNode, bool) {
|
||||
return n.cachedHash, n.cachedHash == nil
|
||||
}
|
||||
|
||||
func (n *expiredNode) encode(w rlp.EncoderBuffer) {
|
||||
var buf [1 + 2*archive.OffsetSize]byte
|
||||
buf[0] = expiredNodeMarker
|
||||
binary.BigEndian.PutUint64(buf[1:], n.offset)
|
||||
binary.BigEndian.PutUint64(buf[1+archive.OffsetSize:], n.size)
|
||||
w.Write(buf[:])
|
||||
}
|
||||
|
||||
func (n *expiredNode) fstring(ind string) string {
|
||||
return fmt.Sprintf("<expired: offset=%d, size=%d> ", n.offset, n.size)
|
||||
}
|
||||
|
||||
// Offset returns the archive file offset for this expired node.
|
||||
func (n *expiredNode) Offset() uint64 {
|
||||
return n.offset
|
||||
}
|
||||
|
||||
// SetArchiveResolver sets the resolver function for this expired node.
|
||||
func (n *expiredNode) SetArchiveResolver(resolver archive.ResolverFn) {
|
||||
n.archiveResolver = resolver
|
||||
}
|
||||
|
||||
// resolveExpiredNodeData resolves an expired node from the archive, verifies
|
||||
// the reconstructed subtree hash, and stamps the cached hash onto the root.
|
||||
// Returns an error if the archive data is corrupted (hash mismatch).
|
||||
func resolveExpiredNodeData(n *expiredNode) (node, error) {
|
||||
start := time.Now()
|
||||
records, err := archive.ArchivedNodeResolver(n.offset, n.size)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to resolve expired node: %w", err)
|
||||
}
|
||||
resolved, err := archiveRecordsToNode(records)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to rebuild expired node from archive: %w", err)
|
||||
}
|
||||
depth := subtreeDepth(resolved)
|
||||
log.Debug("Resurrected expired node from archive",
|
||||
"offset", n.offset, "archiveBytes", n.size,
|
||||
"records", len(records), "depth", depth,
|
||||
"elapsed", time.Since(start))
|
||||
// Verify hash integrity: if the original hash is known, check that the
|
||||
// reconstructed subtree produces the same hash. A mismatch means the
|
||||
// archive is corrupted (e.g. missing leaves due to unresolvable hashNodes
|
||||
// during archival) and any data from it is unreliable.
|
||||
if n.cachedHash != nil {
|
||||
h := newHasher(false)
|
||||
gotHash := h.hash(resolved, true)
|
||||
returnHasherToPool(h)
|
||||
if !bytes.Equal(gotHash, n.cachedHash) {
|
||||
return nil, fmt.Errorf("expired node hash mismatch at offset=%d size=%d: archive data is corrupted (expected %x got %x, %d records)",
|
||||
n.offset, n.size, []byte(n.cachedHash), gotHash, len(records))
|
||||
}
|
||||
// Stamp the original hash onto the resolved subtree root so the
|
||||
// hasher returns it directly instead of re-computing.
|
||||
switch nn := resolved.(type) {
|
||||
case *fullNode:
|
||||
nn.flags.hash = n.cachedHash
|
||||
case *shortNode:
|
||||
nn.flags.hash = n.cachedHash
|
||||
}
|
||||
}
|
||||
// Mark the entire resolved subtree as dirty. This is critical for
|
||||
// correctness with pathdb's diff layer model: when a trie with expired
|
||||
// nodes is modified and committed, the committer only captures dirty
|
||||
// nodes into the NodeSet (which becomes the diff layer). Without this
|
||||
// marking, resolved-but-unmodified sibling nodes within the subtree
|
||||
// would exist nowhere — not in any diff layer (they're clean) and not
|
||||
// in the raw DB (the archiver deleted them). Subsequent trie accesses
|
||||
// from higher diff layers would fall through to the disk layer, find
|
||||
// nothing, and produce MissingNodeError.
|
||||
//
|
||||
// For read-only tries (only get operations, no commit), this dirty
|
||||
// marking is harmless — the nodes are discarded when the trie is GC'd.
|
||||
markSubtreeDirty(resolved)
|
||||
return resolved, nil
|
||||
}
|
||||
|
||||
// subtreeDepth returns the maximum depth of a trie subtree.
|
||||
func subtreeDepth(n node) int {
|
||||
switch n := n.(type) {
|
||||
case *fullNode:
|
||||
max := 0
|
||||
for _, child := range &n.Children {
|
||||
if child != nil {
|
||||
if d := subtreeDepth(child); d > max {
|
||||
max = d
|
||||
}
|
||||
}
|
||||
}
|
||||
return 1 + max
|
||||
case *shortNode:
|
||||
return 1 + subtreeDepth(n.Val)
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// markSubtreeDirty recursively marks all fullNode and shortNode in the
|
||||
// subtree as dirty, preserving any cached hashes. This ensures the
|
||||
// committer will capture them in the NodeSet during trie commit.
|
||||
func markSubtreeDirty(n node) {
|
||||
switch n := n.(type) {
|
||||
case *fullNode:
|
||||
n.flags.dirty = true
|
||||
for _, child := range n.Children[:16] {
|
||||
if child != nil {
|
||||
markSubtreeDirty(child)
|
||||
}
|
||||
}
|
||||
case *shortNode:
|
||||
n.flags.dirty = true
|
||||
markSubtreeDirty(n.Val)
|
||||
}
|
||||
// valueNode, hashNode, nil: no flags to mark
|
||||
}
|
||||
|
||||
func archiveRecordsToNode(records []*archive.Record) (node, error) {
|
||||
if len(records) == 0 {
|
||||
return nil, archive.EmptyArchiveRecord
|
||||
}
|
||||
|
||||
// Build the trie incrementally from nil to produce the canonical
|
||||
// MPT structure. Starting with a fullNode would be wrong when the
|
||||
// original subtree root was a shortNode (shared prefix).
|
||||
var root node
|
||||
for i, record := range records {
|
||||
if err := validateRecordPath(record.Path); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
key, err := normalizeRecordKey(record.Path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(key) < 1 {
|
||||
return nil, fmt.Errorf("empty key in record #%d", i)
|
||||
}
|
||||
root, err = insertTrieNode(root, key, valueNode(record.Value))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return root, nil
|
||||
}
|
||||
|
||||
func validateRecordPath(path []byte) error {
|
||||
for i, b := range path {
|
||||
if b > 16 {
|
||||
return fmt.Errorf("invalid nibble in record path: %d", b)
|
||||
}
|
||||
if b == 16 && i != len(path)-1 {
|
||||
return fmt.Errorf("terminator nibble in middle of record path")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// normalizeRecordKey ensures the record path is a hex-nibble key suitable for
|
||||
// leaf insertion by guaranteeing a single terminator nibble and preserving any
|
||||
// already-terminated path. Empty paths are normalized to a sole terminator.
|
||||
func normalizeRecordKey(path []byte) ([]byte, error) {
|
||||
if len(path) == 0 {
|
||||
return []byte{16}, nil
|
||||
}
|
||||
if hasTerm(path) {
|
||||
return path, nil
|
||||
}
|
||||
key := append([]byte{}, path...)
|
||||
key = append(key, 16)
|
||||
return key, nil
|
||||
}
|
||||
|
||||
func insertTrieNode(n node, key []byte, value node) (node, error) {
|
||||
if len(key) == 0 {
|
||||
return value, nil
|
||||
}
|
||||
switch n := n.(type) {
|
||||
case *shortNode:
|
||||
matchlen := prefixLen(key, n.Key)
|
||||
if matchlen == len(n.Key) {
|
||||
nn, err := insertTrieNode(n.Val, key[matchlen:], value)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &shortNode{Key: n.Key, Val: nn}, nil
|
||||
}
|
||||
branch := &fullNode{}
|
||||
var err error
|
||||
branch.Children[n.Key[matchlen]], err = insertTrieNode(nil, n.Key[matchlen+1:], n.Val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
branch.Children[key[matchlen]], err = insertTrieNode(nil, key[matchlen+1:], value)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if matchlen == 0 {
|
||||
return branch, nil
|
||||
}
|
||||
return &shortNode{Key: key[:matchlen], Val: branch}, nil
|
||||
|
||||
case *fullNode:
|
||||
child, err := insertTrieNode(n.Children[key[0]], key[1:], value)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
n.Children[key[0]] = child
|
||||
return n, nil
|
||||
|
||||
case nil:
|
||||
return &shortNode{Key: key, Val: value}, nil
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid node type in trie insert: %T", n)
|
||||
}
|
||||
}
|
||||
601
trie/expired_node_test.go
Normal file
601
trie/expired_node_test.go
Normal file
|
|
@ -0,0 +1,601 @@
|
|||
// Copyright 2026 go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package trie
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/ethereum/go-ethereum/rlp"
|
||||
"github.com/ethereum/go-ethereum/trie/archive"
|
||||
)
|
||||
|
||||
// setupTestArchive creates a temporary archive directory with an archive file
|
||||
// containing the given records, and configures archive.ArchiveDataDir to point
|
||||
// to it. It returns the offset and size of the written data, and a cleanup function.
|
||||
func setupTestArchive(t *testing.T, records []*archive.Record) (offset, size uint64, cleanup func()) {
|
||||
t.Helper()
|
||||
tmpDir := t.TempDir()
|
||||
gethDir := filepath.Join(tmpDir, "geth")
|
||||
if err := os.MkdirAll(gethDir, 0755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
writer, err := archive.NewArchiveWriter(filepath.Join(gethDir, "nodearchive"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
offset, size, err = writer.WriteSubtree(records)
|
||||
if err != nil {
|
||||
writer.Close()
|
||||
t.Fatal(err)
|
||||
}
|
||||
writer.Close()
|
||||
|
||||
oldDir := archive.ArchiveDataDir
|
||||
archive.ArchiveDataDir = tmpDir
|
||||
|
||||
return offset, size, func() {
|
||||
archive.ArchiveDataDir = oldDir
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeEncodeDecode(t *testing.T) {
|
||||
testCases := []struct {
|
||||
offset uint64
|
||||
size uint64
|
||||
}{
|
||||
{0, 0},
|
||||
{1, 100},
|
||||
{255, 1024},
|
||||
{256, 4096},
|
||||
{1 << 16, 1 << 20},
|
||||
{1 << 32, 1 << 32},
|
||||
{1<<64 - 1, 1<<64 - 1},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
original := &expiredNode{offset: tc.offset, size: tc.size}
|
||||
|
||||
w := rlp.NewEncoderBuffer(nil)
|
||||
original.encode(w)
|
||||
encoded := w.ToBytes()
|
||||
w.Flush()
|
||||
|
||||
decoded, err := decodeNodeUnsafe(nil, encoded)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to decode expired node with offset %d, size %d: %v", tc.offset, tc.size, err)
|
||||
}
|
||||
|
||||
expNode, ok := decoded.(*expiredNode)
|
||||
if !ok {
|
||||
t.Fatalf("decoded node is not an expired node, got %T", decoded)
|
||||
}
|
||||
|
||||
if expNode.offset != original.offset {
|
||||
t.Errorf("offset mismatch: got %d, want %d", expNode.offset, original.offset)
|
||||
}
|
||||
if expNode.size != original.size {
|
||||
t.Errorf("size mismatch: got %d, want %d", expNode.size, original.size)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeEncodedFormat(t *testing.T) {
|
||||
node := &expiredNode{offset: 0x0102030405060708, size: 0x1112131415161718}
|
||||
|
||||
w := rlp.NewEncoderBuffer(nil)
|
||||
node.encode(w)
|
||||
encoded := w.ToBytes()
|
||||
w.Flush()
|
||||
|
||||
expected := []byte{
|
||||
0x00,
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
|
||||
}
|
||||
if !bytes.Equal(encoded, expected) {
|
||||
t.Errorf("encoded format mismatch: got %x, want %x", encoded, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeFstring(t *testing.T) {
|
||||
node := &expiredNode{offset: 12345, size: 6789}
|
||||
s := node.fstring("")
|
||||
if s != "<expired: offset=12345, size=6789> " {
|
||||
t.Errorf("fstring mismatch: got %q", s)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeCache(t *testing.T) {
|
||||
node := &expiredNode{offset: 100}
|
||||
hash, dirty := node.cache()
|
||||
if hash != nil {
|
||||
t.Error("expected nil hash from expired node cache")
|
||||
}
|
||||
if !dirty {
|
||||
t.Error("expected dirty=true from expired node cache")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeInvalidLength(t *testing.T) {
|
||||
invalidCases := [][]byte{
|
||||
{0x00},
|
||||
{0x00, 0x01},
|
||||
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08},
|
||||
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f},
|
||||
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11},
|
||||
}
|
||||
|
||||
for _, buf := range invalidCases {
|
||||
_, err := decodeNodeUnsafe(nil, buf)
|
||||
if err == nil {
|
||||
t.Errorf("expected error for buffer length %d, got nil", len(buf))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeNoArchiveFile(t *testing.T) {
|
||||
// When no archive file exists, Get should return an error
|
||||
tmpDir := t.TempDir()
|
||||
gethDir := filepath.Join(tmpDir, "geth")
|
||||
if err := os.MkdirAll(gethDir, 0755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
oldDir := archive.ArchiveDataDir
|
||||
archive.ArchiveDataDir = tmpDir
|
||||
defer func() { archive.ArchiveDataDir = oldDir }()
|
||||
|
||||
tr := NewEmpty(nil)
|
||||
tr.root = &expiredNode{offset: 100, size: 50}
|
||||
|
||||
_, err := tr.Get([]byte("key"))
|
||||
if err == nil {
|
||||
t.Error("expected error when archive file doesn't exist")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeWithResolver(t *testing.T) {
|
||||
records := []*archive.Record{
|
||||
{Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")},
|
||||
}
|
||||
offset, size, cleanup := setupTestArchive(t, records)
|
||||
defer cleanup()
|
||||
|
||||
tr := NewEmpty(nil)
|
||||
tr.root = &expiredNode{offset: offset, size: size}
|
||||
|
||||
val, err := tr.Get([]byte{0x12})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if string(val) != "testvalue" {
|
||||
t.Errorf("value mismatch: got %q, want %q", val, "testvalue")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeCopy(t *testing.T) {
|
||||
original := &expiredNode{
|
||||
offset: 12345,
|
||||
size: 6789,
|
||||
archiveResolver: archive.ArchivedNodeResolver,
|
||||
}
|
||||
|
||||
copied := copyNode(original)
|
||||
copiedExp, ok := copied.(*expiredNode)
|
||||
if !ok {
|
||||
t.Fatalf("copied node is not an expired node, got %T", copied)
|
||||
}
|
||||
|
||||
if copiedExp.offset != original.offset {
|
||||
t.Errorf("offset mismatch: got %d, want %d", copiedExp.offset, original.offset)
|
||||
}
|
||||
|
||||
if copiedExp.size != original.size {
|
||||
t.Errorf("size mismatch: got %d, want %d", copiedExp.size, original.size)
|
||||
}
|
||||
|
||||
if copiedExp.archiveResolver == nil {
|
||||
t.Error("archive resolver was not copied")
|
||||
}
|
||||
}
|
||||
|
||||
func TestArchiveRecordsToNodeEmpty(t *testing.T) {
|
||||
_, err := archiveRecordsToNode([]*archive.Record{})
|
||||
if !errors.Is(err, archive.EmptyArchiveRecord) {
|
||||
t.Errorf("expected EmptyArchiveRecord error, got %v", err)
|
||||
}
|
||||
|
||||
_, err = archiveRecordsToNode(nil)
|
||||
if !errors.Is(err, archive.EmptyArchiveRecord) {
|
||||
t.Errorf("expected EmptyArchiveRecord error for nil slice, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestArchiveRecordsToNodeMultiple(t *testing.T) {
|
||||
records := []*archive.Record{
|
||||
{Path: []byte{0x01, 16}, Value: []byte("value1")},
|
||||
{Path: []byte{0x02, 16}, Value: []byte("value2")},
|
||||
}
|
||||
|
||||
node, err := archiveRecordsToNode(records)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
fn, ok := node.(*fullNode)
|
||||
if !ok {
|
||||
t.Fatalf("expected fullNode, got %T", node)
|
||||
}
|
||||
|
||||
if fn.Children[0x01] == nil {
|
||||
t.Error("expected child at index 0x01")
|
||||
}
|
||||
if fn.Children[0x02] == nil {
|
||||
t.Error("expected child at index 0x02")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeGetMultipleRecords(t *testing.T) {
|
||||
records := []*archive.Record{
|
||||
{Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
|
||||
{Path: []byte{0x04, 0x05, 16}, Value: []byte("value2")},
|
||||
}
|
||||
offset, size, cleanup := setupTestArchive(t, records)
|
||||
defer cleanup()
|
||||
|
||||
tr := NewEmpty(nil)
|
||||
tr.root = &expiredNode{offset: offset, size: size}
|
||||
|
||||
val, err := tr.Get([]byte{0x12})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if string(val) != "value1" {
|
||||
t.Errorf("value mismatch: got %q, want %q", val, "value1")
|
||||
}
|
||||
|
||||
tr2 := NewEmpty(nil)
|
||||
tr2.root = &expiredNode{offset: offset, size: size}
|
||||
|
||||
val2, err := tr2.Get([]byte{0x45})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if string(val2) != "value2" {
|
||||
t.Errorf("value mismatch: got %q, want %q", val2, "value2")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeGetKeyNotFound(t *testing.T) {
|
||||
records := []*archive.Record{
|
||||
{Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
|
||||
}
|
||||
offset, size, cleanup := setupTestArchive(t, records)
|
||||
defer cleanup()
|
||||
|
||||
tr := NewEmpty(nil)
|
||||
tr.root = &expiredNode{offset: offset, size: size}
|
||||
|
||||
val, err := tr.Get([]byte{0xff, 0xff})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if val != nil {
|
||||
t.Errorf("expected nil value for non-existent key, got %q", val)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeGetPathMismatch(t *testing.T) {
|
||||
records := []*archive.Record{
|
||||
{Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")},
|
||||
}
|
||||
offset, size, cleanup := setupTestArchive(t, records)
|
||||
defer cleanup()
|
||||
|
||||
tr := NewEmpty(nil)
|
||||
tr.root = &expiredNode{offset: offset, size: size}
|
||||
|
||||
val, err := tr.Get([]byte{0x19})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if val != nil {
|
||||
t.Errorf("expected nil value when leaf key doesn't match, got %q", val)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeInsert(t *testing.T) {
|
||||
records := []*archive.Record{
|
||||
{Path: []byte{0x01, 0x02, 16}, Value: []byte("existing")},
|
||||
}
|
||||
offset, size, cleanup := setupTestArchive(t, records)
|
||||
defer cleanup()
|
||||
|
||||
tr := NewEmpty(nil)
|
||||
tr.root = &expiredNode{offset: offset, size: size}
|
||||
|
||||
err := tr.Update([]byte{0x45}, []byte("newvalue"))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on insert: %v", err)
|
||||
}
|
||||
|
||||
val, err := tr.Get([]byte{0x45})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on get: %v", err)
|
||||
}
|
||||
if string(val) != "newvalue" {
|
||||
t.Errorf("value mismatch: got %q, want %q", val, "newvalue")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeUpdate(t *testing.T) {
|
||||
records := []*archive.Record{
|
||||
{Path: []byte{0x01, 0x02, 16}, Value: []byte("oldvalue")},
|
||||
}
|
||||
offset, size, cleanup := setupTestArchive(t, records)
|
||||
defer cleanup()
|
||||
|
||||
tr := NewEmpty(nil)
|
||||
tr.root = &expiredNode{offset: offset, size: size}
|
||||
|
||||
err := tr.Update([]byte{0x12}, []byte("newvalue"))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on update: %v", err)
|
||||
}
|
||||
|
||||
val, err := tr.Get([]byte{0x12})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on get: %v", err)
|
||||
}
|
||||
if string(val) != "newvalue" {
|
||||
t.Errorf("value mismatch: got %q, want %q", val, "newvalue")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeDelete(t *testing.T) {
|
||||
records := []*archive.Record{
|
||||
{Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
|
||||
{Path: []byte{0x04, 0x05, 16}, Value: []byte("value2")},
|
||||
}
|
||||
offset, size, cleanup := setupTestArchive(t, records)
|
||||
defer cleanup()
|
||||
|
||||
tr := NewEmpty(nil)
|
||||
tr.root = &expiredNode{offset: offset, size: size}
|
||||
|
||||
err := tr.Delete([]byte{0x12})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on delete: %v", err)
|
||||
}
|
||||
|
||||
val, err := tr.Get([]byte{0x12})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on get after delete: %v", err)
|
||||
}
|
||||
if val != nil {
|
||||
t.Errorf("expected nil after delete, got %q", val)
|
||||
}
|
||||
|
||||
val2, err := tr.Get([]byte{0x45})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error getting other key: %v", err)
|
||||
}
|
||||
if string(val2) != "value2" {
|
||||
t.Errorf("other value should still exist: got %q, want %q", val2, "value2")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTrieCopyPreservesArchiveResolver(t *testing.T) {
|
||||
records := []*archive.Record{
|
||||
{Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")},
|
||||
}
|
||||
offset, size, cleanup := setupTestArchive(t, records)
|
||||
defer cleanup()
|
||||
|
||||
tr := NewEmpty(nil)
|
||||
tr.root = &expiredNode{offset: offset, size: size}
|
||||
|
||||
trCopy := tr.Copy()
|
||||
|
||||
val, err := trCopy.Get([]byte{0x12})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if string(val) != "testvalue" {
|
||||
t.Errorf("value mismatch: got %q, want %q", val, "testvalue")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWalkWithExpiredNodes(t *testing.T) {
|
||||
records := []*archive.Record{
|
||||
{Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
|
||||
{Path: []byte{0x04, 0x05, 16}, Value: []byte("value2")},
|
||||
{Path: []byte{0x07, 0x08, 16}, Value: []byte("value3")},
|
||||
}
|
||||
offset, size, cleanup := setupTestArchive(t, records)
|
||||
defer cleanup()
|
||||
|
||||
tr := NewEmpty(nil)
|
||||
tr.root = &expiredNode{offset: offset, size: size}
|
||||
|
||||
var leaves []string
|
||||
stats, err := tr.Walk(func(path []byte, value []byte) error {
|
||||
leaves = append(leaves, string(value))
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Walk failed: %v", err)
|
||||
}
|
||||
if stats.Leaves != 3 {
|
||||
t.Errorf("expected 3 leaves, got %d", stats.Leaves)
|
||||
}
|
||||
if stats.ExpiredResolved != 1 {
|
||||
t.Errorf("expected 1 expired resolved, got %d", stats.ExpiredResolved)
|
||||
}
|
||||
// Verify all values were visited
|
||||
expected := map[string]bool{"value1": true, "value2": true, "value3": true}
|
||||
for _, leaf := range leaves {
|
||||
if !expected[leaf] {
|
||||
t.Errorf("unexpected leaf value: %q", leaf)
|
||||
}
|
||||
delete(expected, leaf)
|
||||
}
|
||||
if len(expected) > 0 {
|
||||
t.Errorf("missing leaves: %v", expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWalkEmptyTrie(t *testing.T) {
|
||||
tr := NewEmpty(nil)
|
||||
stats, err := tr.Walk(func(path []byte, value []byte) error {
|
||||
t.Error("callback should not be called for empty trie")
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Walk failed: %v", err)
|
||||
}
|
||||
if stats.Leaves != 0 || stats.ExpiredResolved != 0 {
|
||||
t.Errorf("expected zero stats for empty trie, got leaves=%d expired=%d", stats.Leaves, stats.ExpiredResolved)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWalkCallbackError(t *testing.T) {
|
||||
records := []*archive.Record{
|
||||
{Path: []byte{0x01, 0x02, 16}, Value: []byte("value1")},
|
||||
}
|
||||
offset, size, cleanup := setupTestArchive(t, records)
|
||||
defer cleanup()
|
||||
|
||||
tr := NewEmpty(nil)
|
||||
tr.root = &expiredNode{offset: offset, size: size}
|
||||
|
||||
testErr := errors.New("test error")
|
||||
_, err := tr.Walk(func(path []byte, value []byte) error {
|
||||
return testErr
|
||||
})
|
||||
if !errors.Is(err, testErr) {
|
||||
t.Fatalf("expected test error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestExpiredNodeResolvedSubtreeDirty verifies that when an expired node is
|
||||
// resolved and a sibling leaf is modified, the commit captures ALL resolved
|
||||
// nodes (not just the modified path). Without this fix, resolved-but-unmodified
|
||||
// nodes would be lost: not in the diff layer (clean) and not in the raw DB
|
||||
// (deleted by archiver).
|
||||
func TestExpiredNodeResolvedSubtreeDirty(t *testing.T) {
|
||||
// Use large values (>32 bytes) so leaf nodes are NOT embedded in
|
||||
// their parent. This matches production storage tries where
|
||||
// intermediate nodes are large enough to be stored independently.
|
||||
bigVal1 := bytes.Repeat([]byte("A"), 40)
|
||||
bigVal2 := bytes.Repeat([]byte("B"), 40)
|
||||
|
||||
// Create an archive with records under different branches.
|
||||
records := []*archive.Record{
|
||||
{Path: []byte{0x01, 0x02, 16}, Value: bigVal1},
|
||||
{Path: []byte{0x04, 0x05, 16}, Value: bigVal2},
|
||||
}
|
||||
offset, size, cleanup := setupTestArchive(t, records)
|
||||
defer cleanup()
|
||||
|
||||
tr := NewEmpty(nil)
|
||||
tr.root = &expiredNode{offset: offset, size: size}
|
||||
|
||||
// Insert a value that goes through one branch of the resolved subtree.
|
||||
// This modifies path [1, ...] but leaves path [4, ...] unmodified.
|
||||
if err := tr.Update([]byte{0x12}, bytes.Repeat([]byte("C"), 40)); err != nil {
|
||||
t.Fatalf("Update failed: %v", err)
|
||||
}
|
||||
|
||||
// Commit the trie. The NodeSet should be non-nil because we modified data.
|
||||
_, nodes := tr.Commit(false)
|
||||
if nodes == nil {
|
||||
t.Fatal("expected non-nil NodeSet after modifying expired subtree")
|
||||
}
|
||||
|
||||
// The resolved-but-unmodified sibling (path [4, 5]) should also be
|
||||
// captured in the NodeSet, because markSubtreeDirty ensures all resolved
|
||||
// nodes are dirty. Count the nodes to verify.
|
||||
nodeCount := len(nodes.Nodes)
|
||||
// We expect at least 3 nodes: the root, the modified branch, and the
|
||||
// sibling branch. The exact count depends on trie structure.
|
||||
if nodeCount < 3 {
|
||||
t.Errorf("expected at least 3 nodes in NodeSet (root + modified + sibling), got %d", nodeCount)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMarkSubtreeDirty verifies that markSubtreeDirty correctly sets the dirty
|
||||
// flag on all nodes in a subtree while preserving cached hashes.
|
||||
func TestMarkSubtreeDirty(t *testing.T) {
|
||||
// Build a small trie structure
|
||||
leaf1 := &shortNode{Key: []byte{1, 16}, Val: valueNode("v1")}
|
||||
leaf2 := &shortNode{Key: []byte{2, 16}, Val: valueNode("v2")}
|
||||
branch := &fullNode{}
|
||||
branch.Children[1] = leaf1
|
||||
branch.Children[2] = leaf2
|
||||
|
||||
// Set hash but not dirty (as if loaded from DB)
|
||||
branch.flags = nodeFlag{hash: hashNode("testhash"), dirty: false}
|
||||
leaf1.flags = nodeFlag{hash: hashNode("hash1"), dirty: false}
|
||||
leaf2.flags = nodeFlag{hash: hashNode("hash2"), dirty: false}
|
||||
|
||||
markSubtreeDirty(branch)
|
||||
|
||||
// All nodes should be dirty
|
||||
if !branch.flags.dirty {
|
||||
t.Error("branch should be dirty")
|
||||
}
|
||||
if !leaf1.flags.dirty {
|
||||
t.Error("leaf1 should be dirty")
|
||||
}
|
||||
if !leaf2.flags.dirty {
|
||||
t.Error("leaf2 should be dirty")
|
||||
}
|
||||
|
||||
// Hashes should be preserved
|
||||
if !bytes.Equal(branch.flags.hash, hashNode("testhash")) {
|
||||
t.Error("branch hash should be preserved")
|
||||
}
|
||||
if !bytes.Equal(leaf1.flags.hash, hashNode("hash1")) {
|
||||
t.Error("leaf1 hash should be preserved")
|
||||
}
|
||||
if !bytes.Equal(leaf2.flags.hash, hashNode("hash2")) {
|
||||
t.Error("leaf2 hash should be preserved")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiredNodeGetNode(t *testing.T) {
|
||||
records := []*archive.Record{
|
||||
{Path: []byte{0x01, 0x02, 16}, Value: []byte("testvalue")},
|
||||
}
|
||||
offset, size, cleanup := setupTestArchive(t, records)
|
||||
defer cleanup()
|
||||
|
||||
tr := NewEmpty(nil)
|
||||
tr.root = &expiredNode{offset: offset, size: size}
|
||||
|
||||
_, _, err := tr.GetNode(hexToCompact([]byte{0x01, 0x02}))
|
||||
if err != nil && err.Error() != "non-consensus node" {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
|
@ -18,6 +18,7 @@ package trie
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
|
|
@ -97,6 +98,22 @@ func (h *hasher) hash(n node, force bool) []byte {
|
|||
// hash nodes don't have children, so they're left as were
|
||||
return n
|
||||
|
||||
case *expiredNode:
|
||||
// Return the original subtree hash that was cached when the
|
||||
// expired node was decoded. The parent node references this
|
||||
// hash, so we must return the same value to keep the Merkle
|
||||
// root consistent.
|
||||
if n.cachedHash != nil {
|
||||
return n.cachedHash
|
||||
}
|
||||
// Fallback: hash the marker blob (should not happen in practice
|
||||
// because decodeNodeUnsafe always provides the hash).
|
||||
var buf [1 + 2*8]byte // 17 bytes
|
||||
buf[0] = expiredNodeMarker
|
||||
binary.BigEndian.PutUint64(buf[1:], n.offset)
|
||||
binary.BigEndian.PutUint64(buf[9:], n.size)
|
||||
return h.hashData(buf[:])
|
||||
|
||||
default:
|
||||
panic(fmt.Errorf("unexpected node type, %T", n))
|
||||
}
|
||||
|
|
@ -214,6 +231,12 @@ func (h *hasher) proofHash(original node) []byte {
|
|||
return bytes.Clone(h.encodeShortNode(n))
|
||||
case *fullNode:
|
||||
return bytes.Clone(h.encodeFullNode(n))
|
||||
case *expiredNode:
|
||||
var buf [1 + 2*8]byte
|
||||
buf[0] = expiredNodeMarker
|
||||
binary.BigEndian.PutUint64(buf[1:], n.offset)
|
||||
binary.BigEndian.PutUint64(buf[9:], n.size)
|
||||
return buf[:]
|
||||
default:
|
||||
panic(fmt.Errorf("unexpected node type, %T", original))
|
||||
}
|
||||
|
|
|
|||
12
trie/node.go
12
trie/node.go
|
|
@ -18,13 +18,16 @@ package trie
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/crypto"
|
||||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/rlp"
|
||||
"github.com/ethereum/go-ethereum/trie/archive"
|
||||
)
|
||||
|
||||
var indices = []string{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "[17]"}
|
||||
|
|
@ -158,6 +161,15 @@ func decodeNodeUnsafe(hash, buf []byte) (node, error) {
|
|||
if len(buf) == 0 {
|
||||
return nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
if buf[0] == expiredNodeMarker {
|
||||
if len(buf) != 1+2*archive.OffsetSize {
|
||||
return nil, fmt.Errorf("invalid expired node length: %d", len(buf))
|
||||
}
|
||||
offset := binary.BigEndian.Uint64(buf[1:])
|
||||
size := binary.BigEndian.Uint64(buf[1+archive.OffsetSize:])
|
||||
log.Debug("Decoded expired node", "offset", offset, "size", size, "hash", common.BytesToHash(hash))
|
||||
return &expiredNode{offset: offset, size: size, cachedHash: hashNode(hash), archiveResolver: archive.ArchivedNodeResolver}, nil
|
||||
}
|
||||
elems, _, err := rlp.SplitList(buf)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("decode error: %v", err)
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ import (
|
|||
"github.com/ethereum/go-ethereum/crypto"
|
||||
"github.com/ethereum/go-ethereum/ethdb"
|
||||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/trie/archive"
|
||||
)
|
||||
|
||||
// Prove constructs a merkle proof for key. The result contains all encoded nodes
|
||||
|
|
@ -78,6 +79,16 @@ func (t *Trie) Prove(key []byte, proofDb ethdb.KeyValueWriter) error {
|
|||
// clean cache or the database, they are all in their own
|
||||
// copy and safe to use unsafe decoder.
|
||||
tn = mustDecodeNodeUnsafe(n, blob)
|
||||
case *expiredNode:
|
||||
records, err := archive.ArchivedNodeResolver(n.offset, n.size)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to resolve expired node in proof: %w", err)
|
||||
}
|
||||
resolved, err := archiveRecordsToNode(records)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to rebuild expired node in proof: %w", err)
|
||||
}
|
||||
tn = resolved
|
||||
default:
|
||||
panic(fmt.Sprintf("%T: invalid node: %v", tn, tn))
|
||||
}
|
||||
|
|
@ -617,6 +628,8 @@ func get(tn node, key []byte, skipResolved bool) ([]byte, node) {
|
|||
}
|
||||
case hashNode:
|
||||
return key, n
|
||||
case *expiredNode:
|
||||
return key, n
|
||||
case nil:
|
||||
return key, nil
|
||||
case valueNode:
|
||||
|
|
|
|||
131
trie/trie.go
131
trie/trie.go
|
|
@ -26,6 +26,7 @@ import (
|
|||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/core/types"
|
||||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/trie/archive"
|
||||
"github.com/ethereum/go-ethereum/trie/trienode"
|
||||
"github.com/ethereum/go-ethereum/triedb/database"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
|
@ -57,6 +58,10 @@ type Trie struct {
|
|||
// reader is the handler trie can retrieve nodes from.
|
||||
reader *Reader
|
||||
|
||||
// archiveResolver is an optional callback to resolve expired nodes from
|
||||
// an archive file.
|
||||
archiveResolver archive.ResolverFn
|
||||
|
||||
// Various tracers for capturing the modifications to trie
|
||||
opTracer *opTracer
|
||||
prevalueTracer *PrevalueTracer
|
||||
|
|
@ -70,17 +75,23 @@ func (t *Trie) newFlag() nodeFlag {
|
|||
// Copy returns a copy of Trie.
|
||||
func (t *Trie) Copy() *Trie {
|
||||
return &Trie{
|
||||
root: copyNode(t.root),
|
||||
owner: t.owner,
|
||||
committed: t.committed,
|
||||
unhashed: t.unhashed,
|
||||
uncommitted: t.uncommitted,
|
||||
reader: t.reader,
|
||||
opTracer: t.opTracer.copy(),
|
||||
prevalueTracer: t.prevalueTracer.Copy(),
|
||||
root: copyNode(t.root),
|
||||
owner: t.owner,
|
||||
committed: t.committed,
|
||||
unhashed: t.unhashed,
|
||||
uncommitted: t.uncommitted,
|
||||
reader: t.reader,
|
||||
archiveResolver: t.archiveResolver,
|
||||
opTracer: t.opTracer.copy(),
|
||||
prevalueTracer: t.prevalueTracer.Copy(),
|
||||
}
|
||||
}
|
||||
|
||||
// SetArchiveResolver sets the archive resolver callback for expired nodes.
|
||||
func (t *Trie) SetArchiveResolver(resolver archive.ResolverFn) {
|
||||
t.archiveResolver = resolver
|
||||
}
|
||||
|
||||
// New creates the trie instance with provided trie id and the read-only
|
||||
// database. The state specified by trie id must be available, otherwise
|
||||
// an error will be returned. The trie root specified by trie id can be
|
||||
|
|
@ -218,6 +229,14 @@ func (t *Trie) get(origNode node, key []byte, pos int) (value []byte, newnode no
|
|||
}
|
||||
value, newnode, _, err := t.get(child, key, pos)
|
||||
return value, newnode, true, err
|
||||
case *expiredNode:
|
||||
log.Debug("Resolving expired node in get()", "owner", t.owner, "offset", n.offset, "size", n.size, "pos", pos)
|
||||
newnode, err := resolveExpiredNodeData(n)
|
||||
if err != nil {
|
||||
return nil, n, false, err
|
||||
}
|
||||
value, _, _, err = t.get(newnode, key, pos)
|
||||
return value, newnode, true, err
|
||||
default:
|
||||
panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode))
|
||||
}
|
||||
|
|
@ -352,6 +371,14 @@ func (t *Trie) getNode(origNode node, path []byte, pos int) (item []byte, newnod
|
|||
item, newnode, resolved, err := t.getNode(child, path, pos)
|
||||
return item, newnode, resolved + 1, err
|
||||
|
||||
case *expiredNode:
|
||||
rn, err := resolveExpiredNodeData(n)
|
||||
if err != nil {
|
||||
return nil, n, 0, err
|
||||
}
|
||||
item, newnode, resolvedCount, err := t.getNode(rn, path, pos)
|
||||
return item, newnode, resolvedCount + 1, err
|
||||
|
||||
default:
|
||||
panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode))
|
||||
}
|
||||
|
|
@ -475,6 +502,18 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error
|
|||
}
|
||||
return true, nn, nil
|
||||
|
||||
case *expiredNode:
|
||||
log.Debug("Resolving expired node in insert()", "owner", t.owner, "offset", n.offset, "size", n.size)
|
||||
rn, err := resolveExpiredNodeData(n)
|
||||
if err != nil {
|
||||
return false, nil, err
|
||||
}
|
||||
dirty, nn, err := t.insert(rn, prefix, key, value)
|
||||
if !dirty || err != nil {
|
||||
return false, rn, err
|
||||
}
|
||||
return true, nn, nil
|
||||
|
||||
default:
|
||||
panic(fmt.Sprintf("%T: invalid node: %v", n, n))
|
||||
}
|
||||
|
|
@ -636,6 +675,18 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) {
|
|||
}
|
||||
return true, nn, nil
|
||||
|
||||
case *expiredNode:
|
||||
log.Debug("Resolving expired node in delete()", "owner", t.owner, "offset", n.offset, "size", n.size)
|
||||
rn, err := resolveExpiredNodeData(n)
|
||||
if err != nil {
|
||||
return false, nil, err
|
||||
}
|
||||
dirty, nn, err := t.delete(rn, prefix, key)
|
||||
if !dirty || err != nil {
|
||||
return false, rn, err
|
||||
}
|
||||
return true, nn, nil
|
||||
|
||||
default:
|
||||
panic(fmt.Sprintf("%T: invalid node: %v (%v)", n, n, key))
|
||||
}
|
||||
|
|
@ -666,14 +717,24 @@ func copyNode(n node) node {
|
|||
}
|
||||
case hashNode:
|
||||
return n
|
||||
case *expiredNode:
|
||||
return &expiredNode{
|
||||
offset: n.offset,
|
||||
size: n.size,
|
||||
cachedHash: common.CopyBytes(n.cachedHash),
|
||||
archiveResolver: n.archiveResolver,
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("%T: unknown node type", n))
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Trie) resolve(n node, prefix []byte) (node, error) {
|
||||
if n, ok := n.(hashNode); ok {
|
||||
switch n := n.(type) {
|
||||
case hashNode:
|
||||
return t.resolveAndTrack(n, prefix)
|
||||
case *expiredNode:
|
||||
return resolveExpiredNodeData(n)
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
|
@ -784,6 +845,58 @@ func (t *Trie) Witness() map[string][]byte {
|
|||
return t.prevalueTracer.Values()
|
||||
}
|
||||
|
||||
// WalkStats holds statistics from a Walk traversal.
|
||||
type WalkStats struct {
|
||||
Leaves int // Number of leaf nodes visited
|
||||
ExpiredResolved int // Number of expired nodes resolved from archive
|
||||
}
|
||||
|
||||
// Walk recursively traverses the trie, resolving all nodes including
|
||||
// hashNodes and expiredNodes. It calls fn for each leaf found.
|
||||
// This triggers hash verification for expired nodes via cachedHash.
|
||||
func (t *Trie) Walk(fn func(path []byte, value []byte) error) (WalkStats, error) {
|
||||
return t.walk(t.root, nil, fn)
|
||||
}
|
||||
|
||||
func (t *Trie) walk(n node, path []byte, fn func([]byte, []byte) error) (WalkStats, error) {
|
||||
switch n := n.(type) {
|
||||
case *shortNode:
|
||||
return t.walk(n.Val, append(append([]byte{}, path...), n.Key...), fn)
|
||||
case *fullNode:
|
||||
var stats WalkStats
|
||||
for i, child := range n.Children[:16] {
|
||||
if child != nil {
|
||||
childStats, err := t.walk(child, append(append([]byte{}, path...), byte(i)), fn)
|
||||
if err != nil {
|
||||
return stats, err
|
||||
}
|
||||
stats.Leaves += childStats.Leaves
|
||||
stats.ExpiredResolved += childStats.ExpiredResolved
|
||||
}
|
||||
}
|
||||
return stats, nil
|
||||
case hashNode:
|
||||
resolved, err := t.resolveAndTrack(n, path)
|
||||
if err != nil {
|
||||
return WalkStats{}, err
|
||||
}
|
||||
return t.walk(resolved, path, fn)
|
||||
case *expiredNode:
|
||||
resolved, err := resolveExpiredNodeData(n)
|
||||
if err != nil {
|
||||
return WalkStats{}, err
|
||||
}
|
||||
childStats, err := t.walk(resolved, path, fn)
|
||||
childStats.ExpiredResolved++
|
||||
return childStats, err
|
||||
case valueNode:
|
||||
return WalkStats{Leaves: 1}, fn(path, []byte(n))
|
||||
case nil:
|
||||
return WalkStats{}, nil
|
||||
}
|
||||
return WalkStats{}, nil
|
||||
}
|
||||
|
||||
// reset drops the referenced root node and cleans all internal state.
|
||||
func (t *Trie) reset() {
|
||||
t.root = nil
|
||||
|
|
|
|||
|
|
@ -399,6 +399,28 @@ func (db *Database) Disk() ethdb.Database {
|
|||
return db.disk
|
||||
}
|
||||
|
||||
// DiffHead returns the root hash of the topmost diff layer in pathdb.
|
||||
// If there are no diff layers or the backend is not pathdb, it returns
|
||||
// the zero hash and false.
|
||||
func (db *Database) DiffHead() (common.Hash, bool) {
|
||||
pdb, ok := db.backend.(*pathdb.Database)
|
||||
if !ok {
|
||||
return common.Hash{}, false
|
||||
}
|
||||
return pdb.DiffHead()
|
||||
}
|
||||
|
||||
// DisableStateHistory closes and disables the state history freezer.
|
||||
// This is used by the archiver to bypass state history writes during
|
||||
// diff layer flushing when state history may have gaps.
|
||||
func (db *Database) DisableStateHistory() {
|
||||
pdb, ok := db.backend.(*pathdb.Database)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
pdb.DisableStateHistory()
|
||||
}
|
||||
|
||||
// SnapshotCompleted returns the indicator if the snapshot is completed.
|
||||
func (db *Database) SnapshotCompleted() bool {
|
||||
pdb, ok := db.backend.(*pathdb.Database)
|
||||
|
|
|
|||
|
|
@ -318,6 +318,30 @@ func (db *Database) Update(root common.Hash, parentRoot common.Hash, block uint6
|
|||
return db.tree.cap(root, maxDiffLayers)
|
||||
}
|
||||
|
||||
// DiffHead returns the root hash of the topmost diff layer. If there are no
|
||||
// diff layers (only the disk layer), it returns the disk layer root and false.
|
||||
func (db *Database) DiffHead() (common.Hash, bool) {
|
||||
db.lock.RLock()
|
||||
defer db.lock.RUnlock()
|
||||
|
||||
return db.tree.diffHead()
|
||||
}
|
||||
|
||||
// DisableStateHistory closes and disables the state history freezer. This is
|
||||
// used by the archiver to bypass state history writes during diff layer flushing,
|
||||
// since the archiver only needs trie nodes committed to disk and state history
|
||||
// may have gaps from unclean shutdowns that prevent sequential appends.
|
||||
func (db *Database) DisableStateHistory() {
|
||||
db.lock.Lock()
|
||||
defer db.lock.Unlock()
|
||||
|
||||
if db.stateFreezer != nil {
|
||||
db.stateFreezer.Close()
|
||||
db.stateFreezer = nil
|
||||
log.Info("Disabled state history freezer")
|
||||
}
|
||||
}
|
||||
|
||||
// Commit traverses downwards the layer tree from a specified layer with the
|
||||
// provided state root and all the layers below are flattened downwards. It
|
||||
// can be used alone and mostly for test purposes.
|
||||
|
|
|
|||
|
|
@ -278,9 +278,17 @@ func truncateFromHead(store ethdb.AncientStore, typ historyType, nhead uint64) (
|
|||
return 0, err
|
||||
}
|
||||
// Ensure that the truncation target falls within the valid range.
|
||||
if ohead < nhead || nhead < otail {
|
||||
if nhead < otail {
|
||||
return 0, fmt.Errorf("%w, %s, tail: %d, head: %d, target: %d", errHeadTruncationOutOfRange, typ, otail, ohead, nhead)
|
||||
}
|
||||
// If the target is ahead of the current head, there's nothing to truncate.
|
||||
// This can happen after unclean shutdowns where the state history was not
|
||||
// fully written.
|
||||
if ohead < nhead {
|
||||
log.Warn("State history shorter than target, nothing to truncate",
|
||||
"type", typ.String(), "head", ohead, "target", nhead)
|
||||
return 0, nil
|
||||
}
|
||||
// Short circuit if nothing to truncate.
|
||||
if ohead == nhead {
|
||||
return 0, nil
|
||||
|
|
|
|||
|
|
@ -244,8 +244,8 @@ func TestTruncateOutOfRange(t *testing.T) {
|
|||
target uint64
|
||||
expErr error
|
||||
}{
|
||||
{0, head, nil}, // nothing to delete
|
||||
{0, head + 1, errHeadTruncationOutOfRange},
|
||||
{0, head, nil}, // nothing to delete
|
||||
{0, head + 1, nil}, // gracefully handled after unclean shutdown
|
||||
{0, tail - 1, errHeadTruncationOutOfRange},
|
||||
{1, tail, nil}, // nothing to delete
|
||||
{1, head + 1, errTailTruncationOutOfRange},
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import (
|
|||
// of the referenced layer by themselves.
|
||||
type layerTree struct {
|
||||
base *diskLayer
|
||||
head common.Hash // Root hash of the topmost layer (diff or disk)
|
||||
layers map[common.Hash]layer
|
||||
|
||||
// descendants is a two-dimensional map where the keys represent
|
||||
|
|
@ -59,6 +60,7 @@ func (tree *layerTree) init(head layer) {
|
|||
defer tree.lock.Unlock()
|
||||
|
||||
current := head
|
||||
tree.head = head.rootHash()
|
||||
tree.layers = make(map[common.Hash]layer)
|
||||
tree.descendants = make(map[common.Hash]map[common.Hash]struct{})
|
||||
|
||||
|
|
@ -76,6 +78,18 @@ func (tree *layerTree) init(head layer) {
|
|||
tree.lookup = newLookup(head, tree.isDescendant)
|
||||
}
|
||||
|
||||
// diffHead returns the root hash of the topmost diff layer. If there are no
|
||||
// diff layers, returns the disk layer root and false.
|
||||
func (tree *layerTree) diffHead() (common.Hash, bool) {
|
||||
tree.lock.RLock()
|
||||
defer tree.lock.RUnlock()
|
||||
|
||||
if _, ok := tree.layers[tree.head].(*diffLayer); ok {
|
||||
return tree.head, true
|
||||
}
|
||||
return tree.base.rootHash(), false
|
||||
}
|
||||
|
||||
// get retrieves a layer belonging to the given state root.
|
||||
func (tree *layerTree) get(root common.Hash) layer {
|
||||
tree.lock.RLock()
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ func (r *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte,
|
|||
return nil, err
|
||||
}
|
||||
// Error out if the local one is inconsistent with the target.
|
||||
if !r.noHashCheck && got != hash {
|
||||
if !r.noHashCheck && (len(blob) > 0 && blob[0] != 0) && got != hash {
|
||||
// Location is always available even if the node
|
||||
// is not found.
|
||||
switch loc.loc {
|
||||
|
|
|
|||
Loading…
Reference in a new issue