mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-02-26 07:37:20 +00:00
This is something interesting I came across during my benchmarks, we
spent ~3.8% of all allocations allocating the header number on the heap.
```
(pprof) list GetHeaderByHash
Total: 38197204475
ROUTINE ======================== github.com/ethereum/go-ethereum/core.(*BlockChain).GetHeaderByHash in github.com/ethereum/go-ethereum/core/blockchain_reader.go
0 5786566117 (flat, cum) 15.15% of Total
. . 79:func (bc *BlockChain) GetHeaderByHash(hash common.Hash) *types.Header {
. 5786566117 80: return bc.hc.GetHeaderByHash(hash)
. . 81:}
. . 82:
. . 83:// GetHeaderByNumber retrieves a block header from the database by number,
. . 84:// caching it (associated with its hash) if found.
. . 85:func (bc *BlockChain) GetHeaderByNumber(number uint64) *types.Header {
ROUTINE ======================== github.com/ethereum/go-ethereum/core.(*HeaderChain).GetHeaderByHash in github.com/ethereum/go-ethereum/core/headerchain.go
0 5786566117 (flat, cum) 15.15% of Total
. . 404:func (hc *HeaderChain) GetHeaderByHash(hash common.Hash) *types.Header {
. 1471264309 405: number := hc.GetBlockNumber(hash)
. . 406: if number == nil {
. . 407: return nil
. . 408: }
. 4315301808 409: return hc.GetHeader(hash, *number)
. . 410:}
. . 411:
. . 412:// HasHeader checks if a block header is present in the database or not.
. . 413:// In theory, if header is present in the database, all relative components
. . 414:// like td and hash->number should be present too.
(pprof) list GetBlockNumber
Total: 38197204475
ROUTINE ======================== github.com/ethereum/go-ethereum/core.(*HeaderChain).GetBlockNumber in github.com/ethereum/go-ethereum/core/headerchain.go
94438817 1471264309 (flat, cum) 3.85% of Total
. . 100:func (hc *HeaderChain) GetBlockNumber(hash common.Hash) *uint64 {
94438817 94438817 101: if cached, ok := hc.numberCache.Get(hash); ok {
. . 102: return &cached
. . 103: }
. 1376270828 104: number := rawdb.ReadHeaderNumber(hc.chainDb, hash)
. . 105: if number != nil {
. 554664 106: hc.numberCache.Add(hash, *number)
. . 107: }
. . 108: return number
. . 109:}
. . 110:
. . 111:type headerWriteResult struct {
(pprof) list ReadHeaderNumber
Total: 38197204475
ROUTINE ======================== github.com/ethereum/go-ethereum/core/rawdb.ReadHeaderNumber in github.com/ethereum/go-ethereum/core/rawdb/accessors_chain.go
204606513 1376270828 (flat, cum) 3.60% of Total
. . 146:func ReadHeaderNumber(db ethdb.KeyValueReader, hash common.Hash) *uint64 {
109577863 1281242178 147: data, _ := db.Get(headerNumberKey(hash))
. . 148: if len(data) != 8 {
. . 149: return nil
. . 150: }
95028650 95028650 151: number := binary.BigEndian.Uint64(data)
. . 152: return &number
. . 153:}
. . 154:
. . 155:// WriteHeaderNumber stores the hash->number mapping.
. . 156:func WriteHeaderNumber(db ethdb.KeyValueWriter, hash common.Hash, number uint64) {
```
Opening this to discuss the idea, I know that rawdb.EmptyNumber is not a
great name for the variable, open to suggestions
333 lines
11 KiB
Go
333 lines
11 KiB
Go
// Copyright 2024 The go-ethereum Authors
|
|
// This file is part of the go-ethereum library.
|
|
//
|
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package core
|
|
|
|
import (
|
|
"fmt"
|
|
"sync/atomic"
|
|
|
|
"github.com/ethereum/go-ethereum/common"
|
|
"github.com/ethereum/go-ethereum/core/rawdb"
|
|
"github.com/ethereum/go-ethereum/ethdb"
|
|
"github.com/ethereum/go-ethereum/log"
|
|
)
|
|
|
|
// TxIndexProgress is the struct describing the progress for transaction indexing.
|
|
type TxIndexProgress struct {
|
|
Indexed uint64 // number of blocks whose transactions are indexed
|
|
Remaining uint64 // number of blocks whose transactions are not indexed yet
|
|
}
|
|
|
|
// Done returns an indicator if the transaction indexing is finished.
|
|
func (progress TxIndexProgress) Done() bool {
|
|
return progress.Remaining == 0
|
|
}
|
|
|
|
// txIndexer is the module responsible for maintaining transaction indexes
|
|
// according to the configured indexing range by users.
|
|
type txIndexer struct {
|
|
// limit is the maximum number of blocks from head whose tx indexes
|
|
// are reserved:
|
|
// * 0: means the entire chain should be indexed
|
|
// * N: means the latest N blocks [HEAD-N+1, HEAD] should be indexed
|
|
// and all others shouldn't.
|
|
limit uint64
|
|
|
|
// The current head of blockchain for transaction indexing. This field
|
|
// is accessed by both the indexer and the indexing progress queries.
|
|
head atomic.Uint64
|
|
|
|
// The current tail of the indexed transactions, null indicates
|
|
// that no transactions have been indexed yet.
|
|
//
|
|
// This field is accessed by both the indexer and the indexing
|
|
// progress queries.
|
|
tail atomic.Pointer[uint64]
|
|
|
|
// cutoff denotes the block number before which the chain segment should
|
|
// be pruned and not available locally.
|
|
cutoff uint64
|
|
db ethdb.Database
|
|
term chan chan struct{}
|
|
closed chan struct{}
|
|
}
|
|
|
|
// newTxIndexer initializes the transaction indexer.
|
|
func newTxIndexer(limit uint64, chain *BlockChain) *txIndexer {
|
|
cutoff, _ := chain.HistoryPruningCutoff()
|
|
indexer := &txIndexer{
|
|
limit: limit,
|
|
cutoff: cutoff,
|
|
db: chain.db,
|
|
term: make(chan chan struct{}),
|
|
closed: make(chan struct{}),
|
|
}
|
|
indexer.head.Store(indexer.resolveHead())
|
|
indexer.tail.Store(rawdb.ReadTxIndexTail(chain.db))
|
|
|
|
go indexer.loop(chain)
|
|
|
|
var msg string
|
|
if limit == 0 {
|
|
if indexer.cutoff == 0 {
|
|
msg = "entire chain"
|
|
} else {
|
|
msg = fmt.Sprintf("blocks since #%d", indexer.cutoff)
|
|
}
|
|
} else {
|
|
msg = fmt.Sprintf("last %d blocks", limit)
|
|
}
|
|
log.Info("Initialized transaction indexer", "range", msg)
|
|
|
|
return indexer
|
|
}
|
|
|
|
// run executes the scheduled indexing/unindexing task in a separate thread.
|
|
// If the stop channel is closed, the task should terminate as soon as possible.
|
|
// The done channel will be closed once the task is complete.
|
|
//
|
|
// Existing transaction indexes are assumed to be valid, with both the head
|
|
// and tail above the configured cutoff.
|
|
func (indexer *txIndexer) run(head uint64, stop chan struct{}, done chan struct{}) {
|
|
defer func() { close(done) }()
|
|
|
|
// Short circuit if the chain is either empty, or entirely below the
|
|
// cutoff point.
|
|
if head == 0 || head < indexer.cutoff {
|
|
return
|
|
}
|
|
// The tail flag is not existent, it means the node is just initialized
|
|
// and all blocks in the chain (part of them may from ancient store) are
|
|
// not indexed yet, index the chain according to the configured limit.
|
|
tail := rawdb.ReadTxIndexTail(indexer.db)
|
|
if tail == nil {
|
|
// Determine the first block for transaction indexing, taking the
|
|
// configured cutoff point into account.
|
|
from := uint64(0)
|
|
if indexer.limit != 0 && head >= indexer.limit {
|
|
from = head - indexer.limit + 1
|
|
}
|
|
from = max(from, indexer.cutoff)
|
|
rawdb.IndexTransactions(indexer.db, from, head+1, stop, true)
|
|
return
|
|
}
|
|
// The tail flag is existent (which means indexes in [tail, head] should be
|
|
// present), while the whole chain are requested for indexing.
|
|
if indexer.limit == 0 || head < indexer.limit {
|
|
if *tail > 0 {
|
|
from := max(uint64(0), indexer.cutoff)
|
|
rawdb.IndexTransactions(indexer.db, from, *tail, stop, true)
|
|
}
|
|
return
|
|
}
|
|
// The tail flag is existent, adjust the index range according to configured
|
|
// limit and the latest chain head.
|
|
from := head - indexer.limit + 1
|
|
from = max(from, indexer.cutoff)
|
|
if from < *tail {
|
|
// Reindex a part of missing indices and rewind index tail to HEAD-limit
|
|
rawdb.IndexTransactions(indexer.db, from, *tail, stop, true)
|
|
} else {
|
|
// Unindex a part of stale indices and forward index tail to HEAD-limit
|
|
rawdb.UnindexTransactions(indexer.db, *tail, from, stop, false)
|
|
}
|
|
}
|
|
|
|
// repair ensures that transaction indexes are in a valid state and invalidates
|
|
// them if they are not. The following cases are considered invalid:
|
|
// * The index tail is higher than the chain head.
|
|
// * The chain head is below the configured cutoff, but the index tail is not empty.
|
|
// * The index tail is below the configured cutoff, but it is not empty.
|
|
func (indexer *txIndexer) repair(head uint64) {
|
|
// If the transactions haven't been indexed yet, nothing to repair
|
|
tail := rawdb.ReadTxIndexTail(indexer.db)
|
|
if tail == nil {
|
|
return
|
|
}
|
|
// The transaction index tail is higher than the chain head, which may occur
|
|
// when the chain is rewound to a historical height below the index tail.
|
|
// Purge the transaction indexes from the database. **It's not a common case
|
|
// to rewind the chain head below the index tail**.
|
|
if *tail > head {
|
|
// A crash may occur between the two delete operations,
|
|
// potentially leaving dangling indexes in the database.
|
|
// However, this is considered acceptable.
|
|
indexer.tail.Store(nil)
|
|
rawdb.DeleteTxIndexTail(indexer.db)
|
|
rawdb.DeleteAllTxLookupEntries(indexer.db, nil)
|
|
log.Warn("Purge transaction indexes", "head", head, "tail", *tail)
|
|
return
|
|
}
|
|
|
|
// If the entire chain is below the configured cutoff point,
|
|
// removing the tail of transaction indexing and purges the
|
|
// transaction indexes. **It's not a common case, as the cutoff
|
|
// is usually defined below the chain head**.
|
|
if head < indexer.cutoff {
|
|
// A crash may occur between the two delete operations,
|
|
// potentially leaving dangling indexes in the database.
|
|
// However, this is considered acceptable.
|
|
//
|
|
// The leftover indexes can't be unindexed by scanning
|
|
// the blocks as they are not guaranteed to be available.
|
|
// Traversing the database directly within the transaction
|
|
// index namespace might be slow and expensive, but we
|
|
// have no choice.
|
|
indexer.tail.Store(nil)
|
|
rawdb.DeleteTxIndexTail(indexer.db)
|
|
rawdb.DeleteAllTxLookupEntries(indexer.db, nil)
|
|
log.Warn("Purge transaction indexes", "head", head, "cutoff", indexer.cutoff)
|
|
return
|
|
}
|
|
|
|
// The chain head is above the cutoff while the tail is below the
|
|
// cutoff. Shift the tail to the cutoff point and remove the indexes
|
|
// below.
|
|
if *tail < indexer.cutoff {
|
|
// A crash may occur between the two delete operations,
|
|
// potentially leaving dangling indexes in the database.
|
|
// However, this is considered acceptable.
|
|
indexer.tail.Store(&indexer.cutoff)
|
|
rawdb.WriteTxIndexTail(indexer.db, indexer.cutoff)
|
|
rawdb.DeleteAllTxLookupEntries(indexer.db, func(txhash common.Hash, blob []byte) bool {
|
|
n := rawdb.DecodeTxLookupEntry(blob, indexer.db)
|
|
return n != nil && *n < indexer.cutoff
|
|
})
|
|
log.Warn("Purge transaction indexes below cutoff", "tail", *tail, "cutoff", indexer.cutoff)
|
|
}
|
|
}
|
|
|
|
// resolveHead resolves the block number of the current chain head.
|
|
func (indexer *txIndexer) resolveHead() uint64 {
|
|
headBlockHash := rawdb.ReadHeadBlockHash(indexer.db)
|
|
if headBlockHash == (common.Hash{}) {
|
|
return 0
|
|
}
|
|
headBlockNumber, ok := rawdb.ReadHeaderNumber(indexer.db, headBlockHash)
|
|
if !ok {
|
|
return 0
|
|
}
|
|
return headBlockNumber
|
|
}
|
|
|
|
// loop is the scheduler of the indexer, assigning indexing/unindexing tasks depending
|
|
// on the received chain event.
|
|
func (indexer *txIndexer) loop(chain *BlockChain) {
|
|
defer close(indexer.closed)
|
|
|
|
// Listening to chain events and manipulate the transaction indexes.
|
|
var (
|
|
stop chan struct{} // Non-nil if background routine is active
|
|
done chan struct{} // Non-nil if background routine is active
|
|
headCh = make(chan ChainHeadEvent)
|
|
sub = chain.SubscribeChainHeadEvent(headCh)
|
|
)
|
|
defer sub.Unsubscribe()
|
|
|
|
// Validate the transaction indexes and repair if necessary
|
|
head := indexer.head.Load()
|
|
indexer.repair(head)
|
|
|
|
// Launch the initial processing if chain is not empty (head != genesis).
|
|
// This step is useful in these scenarios that chain has no progress.
|
|
if head != 0 {
|
|
stop = make(chan struct{})
|
|
done = make(chan struct{})
|
|
go indexer.run(head, stop, done)
|
|
}
|
|
for {
|
|
select {
|
|
case h := <-headCh:
|
|
indexer.head.Store(h.Header.Number.Uint64())
|
|
if done == nil {
|
|
stop = make(chan struct{})
|
|
done = make(chan struct{})
|
|
go indexer.run(h.Header.Number.Uint64(), stop, done)
|
|
}
|
|
|
|
case <-done:
|
|
stop = nil
|
|
done = nil
|
|
indexer.tail.Store(rawdb.ReadTxIndexTail(indexer.db))
|
|
|
|
case ch := <-indexer.term:
|
|
if stop != nil {
|
|
close(stop)
|
|
}
|
|
if done != nil {
|
|
log.Info("Waiting background transaction indexer to exit")
|
|
<-done
|
|
}
|
|
close(ch)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// report returns the tx indexing progress.
|
|
func (indexer *txIndexer) report(head uint64, tail *uint64) TxIndexProgress {
|
|
// Special case if the head is even below the cutoff,
|
|
// nothing to index.
|
|
if head < indexer.cutoff {
|
|
return TxIndexProgress{
|
|
Indexed: 0,
|
|
Remaining: 0,
|
|
}
|
|
}
|
|
// Compute how many blocks are supposed to be indexed
|
|
total := indexer.limit
|
|
if indexer.limit == 0 || total > head {
|
|
total = head + 1 // genesis included
|
|
}
|
|
length := head - indexer.cutoff + 1 // all available chain for indexing
|
|
if total > length {
|
|
total = length
|
|
}
|
|
// Compute how many blocks have been indexed
|
|
var indexed uint64
|
|
if tail != nil {
|
|
indexed = head - *tail + 1
|
|
}
|
|
// The value of indexed might be larger than total if some blocks need
|
|
// to be unindexed, avoiding a negative remaining.
|
|
var remaining uint64
|
|
if indexed < total {
|
|
remaining = total - indexed
|
|
}
|
|
return TxIndexProgress{
|
|
Indexed: indexed,
|
|
Remaining: remaining,
|
|
}
|
|
}
|
|
|
|
// txIndexProgress retrieves the transaction indexing progress. The reported
|
|
// progress may slightly lag behind the actual indexing state, as the tail is
|
|
// only updated at the end of each indexing operation. However, this delay is
|
|
// considered acceptable.
|
|
func (indexer *txIndexer) txIndexProgress() TxIndexProgress {
|
|
return indexer.report(indexer.head.Load(), indexer.tail.Load())
|
|
}
|
|
|
|
// close shutdown the indexer. Safe to be called for multiple times.
|
|
func (indexer *txIndexer) close() {
|
|
ch := make(chan struct{})
|
|
select {
|
|
case indexer.term <- ch:
|
|
<-ch
|
|
case <-indexer.closed:
|
|
}
|
|
}
|