mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-03-22 02:48:08 +00:00
This PR improves the pbss archive mode. Initial sync of an archive mode which has the --gcmode archive flag enabled will be significantly sped up. It achieves that with the following changes: The indexer now attempts to process histories in batch whenever possible. Batch indexing is enforced when the node is still syncing and the local chain head is behind the network chain head. In this scenario, instead of scheduling indexing frequently alongside block insertion, the indexer waits until a sufficient amount of history has accumulated and then processes it in a batch, which is significantly more efficient. --------- Co-authored-by: Sina M <1591639+s1na@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
162 lines
6.6 KiB
Go
162 lines
6.6 KiB
Go
// Copyright 2025 The go-ethereum Authors
|
|
// This file is part of the go-ethereum library.
|
|
//
|
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package pathdb
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/ethereum/go-ethereum/common"
|
|
"github.com/ethereum/go-ethereum/log"
|
|
"github.com/ethereum/go-ethereum/params"
|
|
)
|
|
|
|
const (
|
|
// defaultTrieCleanSize is the default memory allowance of clean trie cache.
|
|
defaultTrieCleanSize = 16 * 1024 * 1024
|
|
|
|
// defaultStateCleanSize is the default memory allowance of clean state cache.
|
|
defaultStateCleanSize = 16 * 1024 * 1024
|
|
|
|
// maxBufferSize is the maximum memory allowance of node buffer.
|
|
// Too large buffer will cause the system to pause for a long
|
|
// time when write happens. Also, the largest batch that pebble can
|
|
// support is 4GB, node will panic if batch size exceeds this limit.
|
|
maxBufferSize = 256 * 1024 * 1024
|
|
|
|
// defaultBufferSize is the default memory allowance of node buffer
|
|
// that aggregates the writes from above until it's flushed into the
|
|
// disk. It's meant to be used once the initial sync is finished.
|
|
// Do not increase the buffer size arbitrarily, otherwise the system
|
|
// pause time will increase when the database writes happen.
|
|
defaultBufferSize = 64 * 1024 * 1024
|
|
|
|
// maxFullValueCheckpoint defines the maximum allowed encoding frequency (1/16)
|
|
// for storing nodes in full format. With this setting, a node may be written
|
|
// to the trienode history as a full value at the specified frequency.
|
|
//
|
|
// Note that the frequency is not strict: the actual decision is probabilistic.
|
|
// Only the overall long-term full-value encoding rate is enforced.
|
|
//
|
|
// Values beyond this limit are considered ineffective, as the trienode history
|
|
// is already well compressed. Increasing it further will only degrade read
|
|
// performance linearly.
|
|
maxFullValueCheckpoint = 16
|
|
|
|
// defaultFullValueCheckpoint defines the default full-value encoding frequency
|
|
// (1/8) for storing nodes in full format. With this setting, nodes may be
|
|
// written to the trienode history as full values at the specified rate.
|
|
//
|
|
// This strikes a balance between effective compression of the trienode history
|
|
// and acceptable read performance.
|
|
defaultFullValueCheckpoint = 8
|
|
)
|
|
|
|
var (
|
|
// maxDiffLayers is the maximum diff layers allowed in the layer tree.
|
|
maxDiffLayers = 128
|
|
)
|
|
|
|
// Defaults contains default settings for Ethereum mainnet.
|
|
var Defaults = &Config{
|
|
StateHistory: params.FullImmutabilityThreshold,
|
|
TrienodeHistory: -1,
|
|
FullValueCheckpoint: defaultFullValueCheckpoint,
|
|
EnableStateIndexing: false,
|
|
TrieCleanSize: defaultTrieCleanSize,
|
|
StateCleanSize: defaultStateCleanSize,
|
|
WriteBufferSize: defaultBufferSize,
|
|
}
|
|
|
|
// ReadOnly is the config in order to open database in read only mode.
|
|
var ReadOnly = &Config{
|
|
ReadOnly: true,
|
|
TrienodeHistory: -1,
|
|
TrieCleanSize: defaultTrieCleanSize,
|
|
StateCleanSize: defaultStateCleanSize,
|
|
FullValueCheckpoint: defaultFullValueCheckpoint,
|
|
}
|
|
|
|
// Config contains the settings for database.
|
|
type Config struct {
|
|
TrieCleanSize int // Maximum memory allowance (in bytes) for caching clean trie data
|
|
StateCleanSize int // Maximum memory allowance (in bytes) for caching clean state data
|
|
WriteBufferSize int // Maximum memory allowance (in bytes) for write buffer
|
|
ReadOnly bool // Flag whether the database is opened in read only mode
|
|
JournalDirectory string // Absolute path of journal directory (null means the journal data is persisted in key-value store)
|
|
|
|
// Historical state configurations
|
|
StateHistory uint64 // Number of recent blocks to maintain state history for, 0: full chain
|
|
TrienodeHistory int64 // Number of recent blocks to maintain trienode history for, 0: full chain, negative: disable
|
|
EnableStateIndexing bool // Whether to enable state history indexing for external state access
|
|
FullValueCheckpoint uint32 // The rate at which trie nodes are encoded in full-value format
|
|
|
|
// Testing configurations
|
|
SnapshotNoBuild bool // Flag Whether the state generation is disabled
|
|
NoAsyncFlush bool // Flag whether the background buffer flushing is disabled
|
|
NoAsyncGeneration bool // Flag whether the background generation is disabled
|
|
NoHistoryIndexDelay bool // Flag whether the history index delay is disabled
|
|
}
|
|
|
|
// sanitize checks the provided user configurations and changes anything that's
|
|
// unreasonable or unworkable.
|
|
func (c *Config) sanitize() *Config {
|
|
conf := *c
|
|
if conf.WriteBufferSize > maxBufferSize {
|
|
log.Warn("Sanitizing invalid node buffer size", "provided", common.StorageSize(conf.WriteBufferSize), "updated", common.StorageSize(maxBufferSize))
|
|
conf.WriteBufferSize = maxBufferSize
|
|
}
|
|
if conf.FullValueCheckpoint > maxFullValueCheckpoint {
|
|
log.Warn("Sanitizing trienode history full value checkpoint", "provided", conf.FullValueCheckpoint, "updated", maxFullValueCheckpoint)
|
|
conf.FullValueCheckpoint = maxFullValueCheckpoint
|
|
}
|
|
if conf.FullValueCheckpoint == 0 {
|
|
conf.FullValueCheckpoint = 1
|
|
log.Info("Disabling diff mode trie node history encoding")
|
|
}
|
|
return &conf
|
|
}
|
|
|
|
// fields returns a list of attributes of config for printing.
|
|
func (c *Config) fields() []interface{} {
|
|
var list []interface{}
|
|
if c.ReadOnly {
|
|
list = append(list, "readonly", true)
|
|
}
|
|
list = append(list, "triecache", common.StorageSize(c.TrieCleanSize))
|
|
list = append(list, "statecache", common.StorageSize(c.StateCleanSize))
|
|
list = append(list, "buffer", common.StorageSize(c.WriteBufferSize))
|
|
|
|
if c.StateHistory == 0 {
|
|
list = append(list, "state-history", "entire chain")
|
|
} else {
|
|
list = append(list, "state-history", fmt.Sprintf("last %d blocks", c.StateHistory))
|
|
}
|
|
if c.TrienodeHistory >= 0 {
|
|
if c.TrienodeHistory == 0 {
|
|
list = append(list, "trie-history", "entire chain")
|
|
} else {
|
|
list = append(list, "trie-history", fmt.Sprintf("last %d blocks", c.TrienodeHistory))
|
|
}
|
|
}
|
|
if c.EnableStateIndexing {
|
|
list = append(list, "index-history", true)
|
|
}
|
|
if c.JournalDirectory != "" {
|
|
list = append(list, "journal-dir", c.JournalDirectory)
|
|
}
|
|
return list
|
|
}
|