1
0
Fork 0
forked from forks/go-ethereum

cmd/geth: add prune history command (#31384)

This adds a new subcommand 'geth prune-history' that removes the pre-merge history
on supported networks. Geth is not fully ready to work in this mode, please do not run
this command on your production node.

---------

Co-authored-by: Felix Lange <fjl@twurst.com>
This commit is contained in:
Sina M 2025-03-21 13:12:56 +01:00 committed by GitHub
parent 1886922264
commit 8fe09df54f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 171 additions and 25 deletions

View file

@ -35,6 +35,7 @@ import (
"github.com/ethereum/go-ethereum/core/state"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/eth/ethconfig"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/internal/era"
"github.com/ethereum/go-ethereum/log"
@ -189,6 +190,18 @@ It's deprecated, please use "geth db import" instead.
This command dumps out the state for a given block (or latest, if none provided).
`,
}
pruneCommand = &cli.Command{
Action: pruneHistory,
Name: "prune-history",
Usage: "Prune blockchain history (block bodies and receipts) up to the merge block",
ArgsUsage: "",
Flags: utils.DatabaseFlags,
Description: `
The prune-history command removes historical block bodies and receipts from the
blockchain database up to the merge block, while preserving block headers. This
helps reduce storage requirements for nodes that don't need full historical data.`,
}
)
// initGenesis will initialise the given JSON format genesis file and writes it as
@ -598,3 +611,51 @@ func hashish(x string) bool {
_, err := strconv.Atoi(x)
return err != nil
}
func pruneHistory(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
// Open the chain database
chain, chaindb := utils.MakeChain(ctx, stack, false)
defer chaindb.Close()
defer chain.Stop()
// Determine the prune point. This will be the first PoS block.
prunePoint, ok := ethconfig.HistoryPrunePoints[chain.Genesis().Hash()]
if !ok || prunePoint == nil {
return errors.New("prune point not found")
}
var (
mergeBlock = prunePoint.BlockNumber
mergeBlockHash = prunePoint.BlockHash.Hex()
)
// Check we're far enough past merge to ensure all data is in freezer
currentHeader := chain.CurrentHeader()
if currentHeader == nil {
return errors.New("current header not found")
}
if currentHeader.Number.Uint64() < mergeBlock+params.FullImmutabilityThreshold {
return fmt.Errorf("chain not far enough past merge block, need %d more blocks",
mergeBlock+params.FullImmutabilityThreshold-currentHeader.Number.Uint64())
}
// Double-check the prune block in db has the expected hash.
hash := rawdb.ReadCanonicalHash(chaindb, mergeBlock)
if hash != common.HexToHash(mergeBlockHash) {
return fmt.Errorf("merge block hash mismatch: got %s, want %s", hash.Hex(), mergeBlockHash)
}
log.Info("Starting history pruning", "head", currentHeader.Number, "tail", mergeBlock, "tailHash", mergeBlockHash)
start := time.Now()
rawdb.PruneTransactionIndex(chaindb, mergeBlock)
if _, err := chaindb.TruncateTail(mergeBlock); err != nil {
return fmt.Errorf("failed to truncate ancient data: %v", err)
}
log.Info("History pruning completed", "tail", mergeBlock, "elapsed", common.PrettyDuration(time.Since(start)))
// TODO(s1na): what if there is a crash between the two prune operations?
return nil
}

View file

@ -226,6 +226,7 @@ func init() {
removedbCommand,
dumpCommand,
dumpGenesisCommand,
pruneCommand,
// See accountcmd.go:
accountCommand,
walletCommand,

View file

@ -332,7 +332,8 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis
bc.prefetcher = newStatePrefetcher(chainConfig, bc.hc)
bc.processor = NewStateProcessor(chainConfig, bc.hc)
bc.genesisBlock = bc.GetBlockByNumber(0)
genesisHeader := bc.GetHeaderByNumber(0)
bc.genesisBlock = types.NewBlockWithHeader(genesisHeader)
if bc.genesisBlock == nil {
return nil, ErrNoGenesis
}

View file

@ -103,13 +103,14 @@ func DeleteTxLookupEntries(db ethdb.KeyValueWriter, hashes []common.Hash) {
// DeleteAllTxLookupEntries purges all the transaction indexes in the database.
// If condition is specified, only the entry with condition as True will be
// removed; If condition is not specified, the entry is deleted.
func DeleteAllTxLookupEntries(db ethdb.KeyValueStore, condition func([]byte) bool) {
func DeleteAllTxLookupEntries(db ethdb.KeyValueStore, condition func(common.Hash, []byte) bool) {
iter := NewKeyLengthIterator(db.NewIterator(txLookupPrefix, nil), common.HashLength+len(txLookupPrefix))
defer iter.Release()
batch := db.NewBatch()
for iter.Next() {
if condition == nil || condition(iter.Value()) {
txhash := common.Hash(iter.Key()[1:])
if condition == nil || condition(txhash, iter.Value()) {
batch.Delete(iter.Key())
}
if batch.ValueSize() >= ethdb.IdealBatchSize {

View file

@ -17,6 +17,7 @@
package rawdb
import (
"encoding/binary"
"runtime"
"sync/atomic"
"time"
@ -361,3 +362,38 @@ func UnindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt ch
func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
unindexTransactions(db, from, to, interrupt, hook, false)
}
// PruneTransactionIndex removes all tx index entries below a certain block number.
func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) {
tail := ReadTxIndexTail(db)
if tail == nil || *tail > pruneBlock {
return // no index, or index ends above pruneBlock
}
// There are blocks below pruneBlock in the index. Iterate the entire index to remove
// their entries. Note if this fails, the index is messed up, but tail still points to
// the old tail.
var count, removed int
DeleteAllTxLookupEntries(db, func(txhash common.Hash, v []byte) bool {
count++
if count%10000000 == 0 {
log.Info("Pruning tx index", "count", count, "removed", removed)
}
if len(v) > 8 {
log.Error("Skipping legacy tx index entry", "hash", txhash)
return false
}
bn := decodeNumber(v)
if bn < pruneBlock {
removed++
return true
}
return false
})
WriteTxIndexTail(db, pruneBlock)
}
func decodeNumber(b []byte) uint64 {
var numBuffer [8]byte
copy(numBuffer[8-len(b):], b)
return binary.BigEndian.Uint64(numBuffer[:])
}

View file

@ -25,6 +25,7 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/ethdb"
)
func TestChainIterator(t *testing.T) {
@ -102,19 +103,18 @@ func TestChainIterator(t *testing.T) {
}
}
func TestIndexTransactions(t *testing.T) {
// Construct test chain db
chainDb := NewMemoryDatabase()
var block *types.Block
func initDatabaseWithTransactions(db ethdb.Database) ([]*types.Block, []*types.Transaction) {
var blocks []*types.Block
var txs []*types.Transaction
to := common.BytesToAddress([]byte{0x11})
// Write empty genesis block
block = types.NewBlock(&types.Header{Number: big.NewInt(int64(0))}, nil, nil, newTestHasher())
WriteBlock(chainDb, block)
WriteCanonicalHash(chainDb, block.Hash(), block.NumberU64())
block := types.NewBlock(&types.Header{Number: big.NewInt(int64(0))}, nil, nil, newTestHasher())
WriteBlock(db, block)
WriteCanonicalHash(db, block.Hash(), block.NumberU64())
blocks = append(blocks, block)
// Create transactions.
for i := uint64(1); i <= 10; i++ {
var tx *types.Transaction
if i%2 == 0 {
@ -138,10 +138,21 @@ func TestIndexTransactions(t *testing.T) {
})
}
txs = append(txs, tx)
block = types.NewBlock(&types.Header{Number: big.NewInt(int64(i))}, &types.Body{Transactions: types.Transactions{tx}}, nil, newTestHasher())
WriteBlock(chainDb, block)
WriteCanonicalHash(chainDb, block.Hash(), block.NumberU64())
block := types.NewBlock(&types.Header{Number: big.NewInt(int64(i))}, &types.Body{Transactions: types.Transactions{tx}}, nil, newTestHasher())
WriteBlock(db, block)
WriteCanonicalHash(db, block.Hash(), block.NumberU64())
blocks = append(blocks, block)
}
return blocks, txs
}
func TestIndexTransactions(t *testing.T) {
// Construct test chain db
chainDB := NewMemoryDatabase()
_, txs := initDatabaseWithTransactions(chainDB)
// verify checks whether the tx indices in the range [from, to)
// is expected.
verify := func(from, to int, exist bool, tail uint64) {
@ -149,7 +160,7 @@ func TestIndexTransactions(t *testing.T) {
if i == 0 {
continue
}
number := ReadTxLookupEntry(chainDb, txs[i-1].Hash())
number := ReadTxLookupEntry(chainDB, txs[i-1].Hash())
if exist && number == nil {
t.Fatalf("Transaction index %d missing", i)
}
@ -157,29 +168,29 @@ func TestIndexTransactions(t *testing.T) {
t.Fatalf("Transaction index %d is not deleted", i)
}
}
number := ReadTxIndexTail(chainDb)
number := ReadTxIndexTail(chainDB)
if number == nil || *number != tail {
t.Fatalf("Transaction tail mismatch")
}
}
IndexTransactions(chainDb, 5, 11, nil, false)
IndexTransactions(chainDB, 5, 11, nil, false)
verify(5, 11, true, 5)
verify(0, 5, false, 5)
IndexTransactions(chainDb, 0, 5, nil, false)
IndexTransactions(chainDB, 0, 5, nil, false)
verify(0, 11, true, 0)
UnindexTransactions(chainDb, 0, 5, nil, false)
UnindexTransactions(chainDB, 0, 5, nil, false)
verify(5, 11, true, 5)
verify(0, 5, false, 5)
UnindexTransactions(chainDb, 5, 11, nil, false)
UnindexTransactions(chainDB, 5, 11, nil, false)
verify(0, 11, false, 11)
// Testing corner cases
signal := make(chan struct{})
var once sync.Once
indexTransactionsForTesting(chainDb, 5, 11, signal, func(n uint64) bool {
indexTransactionsForTesting(chainDB, 5, 11, signal, func(n uint64) bool {
if n <= 8 {
once.Do(func() {
close(signal)
@ -190,11 +201,11 @@ func TestIndexTransactions(t *testing.T) {
})
verify(9, 11, true, 9)
verify(0, 9, false, 9)
IndexTransactions(chainDb, 0, 9, nil, false)
IndexTransactions(chainDB, 0, 9, nil, false)
signal = make(chan struct{})
var once2 sync.Once
unindexTransactionsForTesting(chainDb, 0, 11, signal, func(n uint64) bool {
unindexTransactionsForTesting(chainDB, 0, 11, signal, func(n uint64) bool {
if n >= 8 {
once2.Do(func() {
close(signal)
@ -206,3 +217,37 @@ func TestIndexTransactions(t *testing.T) {
verify(8, 11, true, 8)
verify(0, 8, false, 8)
}
func TestPruneTransactionIndex(t *testing.T) {
chainDB := NewMemoryDatabase()
blocks, _ := initDatabaseWithTransactions(chainDB)
lastBlock := blocks[len(blocks)-1].NumberU64()
pruneBlock := lastBlock - 3
IndexTransactions(chainDB, 0, lastBlock+1, nil, false)
// Check all transactions are in index.
for _, block := range blocks {
for _, tx := range block.Transactions() {
num := ReadTxLookupEntry(chainDB, tx.Hash())
if num == nil || *num != block.NumberU64() {
t.Fatalf("wrong TxLookup entry: %x -> %v", tx.Hash(), num)
}
}
}
PruneTransactionIndex(chainDB, pruneBlock)
// Check transactions from old blocks not included.
for _, block := range blocks {
for _, tx := range block.Transactions() {
num := ReadTxLookupEntry(chainDB, tx.Hash())
if block.NumberU64() < pruneBlock && num != nil {
t.Fatalf("TxLookup entry not removed: %x -> %v", tx.Hash(), num)
}
if block.NumberU64() >= pruneBlock && (num == nil || *num != block.NumberU64()) {
t.Fatalf("wrong TxLookup entry after pruning: %x -> %v", tx.Hash(), num)
}
}
}
}

View file

@ -20,6 +20,7 @@ import (
"errors"
"fmt"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
@ -186,7 +187,7 @@ func (indexer *txIndexer) repair(head uint64) {
// potentially leaving dangling indexes in the database.
// However, this is considered acceptable.
rawdb.WriteTxIndexTail(indexer.db, indexer.cutoff)
rawdb.DeleteAllTxLookupEntries(indexer.db, func(blob []byte) bool {
rawdb.DeleteAllTxLookupEntries(indexer.db, func(txhash common.Hash, blob []byte) bool {
n := rawdb.DecodeTxLookupEntry(blob, indexer.db)
return n != nil && *n < indexer.cutoff
})

@ -1 +1 @@
Subproject commit 81862e4848585a438d64f911a19b3825f0f4cd95
Subproject commit faf33b471465d3c6cdc3d04fbd690895f78d33f2