core, eth, trie: rework preimage store #25287 (#1099)

* core, trie, eth, cmd: rework preimage store

* trie: address comment

Co-authored-by: rjl493456442 <garyrong0905@gmail.com>
This commit is contained in:
Daniel Liu 2025-08-29 05:22:44 +08:00 committed by GitHub
parent db50cdd311
commit 3fed9ebeb8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 194 additions and 135 deletions

View file

@ -147,7 +147,7 @@ func (t *XDCXTrie) GetKey(shaKey []byte) []byte {
if key, ok := t.getSecKeyCache()[string(shaKey)]; ok {
return key
}
return t.trie.Db.Preimage(common.BytesToHash(shaKey))
return t.trie.Db().Preimage(common.BytesToHash(shaKey))
}
// Commit writes all nodes and the secure hash pre-images to the trie's database.
@ -158,12 +158,7 @@ func (t *XDCXTrie) GetKey(shaKey []byte) []byte {
func (t *XDCXTrie) Commit(onleaf trie.LeafCallback) (common.Hash, error) {
// Write all the pre-images to the actual disk database
if len(t.getSecKeyCache()) > 0 {
t.trie.Db.Lock.Lock()
for hk, key := range t.secKeyCache {
t.trie.Db.InsertPreimage(common.BytesToHash([]byte(hk)), key)
}
t.trie.Db.Lock.Unlock()
t.trie.Db().InsertPreimage(t.secKeyCache)
t.secKeyCache = make(map[string][]byte)
}
// Commit the trie to its intermediate node database

View file

@ -143,7 +143,7 @@ func (t *XDCXTrie) GetKey(shaKey []byte) []byte {
if key, ok := t.getSecKeyCache()[string(shaKey)]; ok {
return key
}
return t.trie.Db.Preimage(common.BytesToHash(shaKey))
return t.trie.Db().Preimage(common.BytesToHash(shaKey))
}
// Commit writes all nodes and the secure hash pre-images to the trie's database.
@ -154,12 +154,7 @@ func (t *XDCXTrie) GetKey(shaKey []byte) []byte {
func (t *XDCXTrie) Commit(onleaf trie.LeafCallback) (common.Hash, error) {
// Write all the pre-images to the actual disk database
if len(t.getSecKeyCache()) > 0 {
t.trie.Db.Lock.Lock()
for hk, key := range t.secKeyCache {
t.trie.Db.InsertPreimage(common.BytesToHash([]byte(hk)), key)
}
t.trie.Db.Lock.Unlock()
t.trie.Db().InsertPreimage(t.secKeyCache)
t.secKeyCache = make(map[string][]byte)
}
// Commit the trie to its intermediate node database

View file

@ -26,6 +26,7 @@ import (
"github.com/XinFinOrg/XDPoSChain/core/types"
"github.com/XinFinOrg/XDPoSChain/crypto"
"github.com/XinFinOrg/XDPoSChain/ethdb"
"github.com/XinFinOrg/XDPoSChain/trie"
)
type stateTest struct {
@ -40,7 +41,9 @@ func newStateTest() *stateTest {
}
func TestDump(t *testing.T) {
s := newStateTest()
db := rawdb.NewMemoryDatabase()
sdb, _ := New(common.Hash{}, NewDatabaseWithConfig(db, &trie.Config{Preimages: true}))
s := &stateTest{db: db, state: sdb}
// generate a few entries
obj1 := s.state.GetOrNewStateObject(common.BytesToAddress([]byte{0x01}))

View file

@ -24,12 +24,12 @@ import (
"sort"
"testing"
"github.com/XinFinOrg/XDPoSChain/common"
"github.com/XinFinOrg/XDPoSChain/core/rawdb"
"github.com/XinFinOrg/XDPoSChain/core/state"
"github.com/XinFinOrg/XDPoSChain/core/types"
"github.com/XinFinOrg/XDPoSChain/crypto"
"github.com/XinFinOrg/XDPoSChain/common"
"github.com/XinFinOrg/XDPoSChain/core/state"
"github.com/XinFinOrg/XDPoSChain/trie"
"github.com/davecgh/go-spew/spew"
)
@ -66,7 +66,7 @@ func (h resultHash) Less(i, j int) bool { return bytes.Compare(h[i].Bytes(), h[j
func TestAccountRange(t *testing.T) {
var (
statedb = state.NewDatabase(rawdb.NewMemoryDatabase())
statedb = state.NewDatabaseWithConfig(rawdb.NewMemoryDatabase(), &trie.Config{Preimages: true})
state, _ = state.New(common.Hash{}, statedb)
addrs = [AccountRangeMaxResults * 2]common.Address{}
m = map[common.Address]bool{}

View file

@ -73,8 +73,6 @@ type Database struct {
oldest common.Hash // Oldest tracked Node, flush-list head
newest common.Hash // Newest tracked Node, flush-list tail
preimages map[common.Hash][]byte // Preimages of nodes from the secure trie
gctime time.Duration // Time spent on garbage collection since last commit
gcnodes uint64 // Nodes garbage collected since last commit
gcsize common.StorageSize // Data storage garbage collected since last commit
@ -83,11 +81,11 @@ type Database struct {
flushnodes uint64 // Nodes flushed since last commit
flushsize common.StorageSize // Data storage flushed since last commit
dirtiesSize common.StorageSize // Storage size of the dirty Node Cache (exc. metadata)
childrenSize common.StorageSize // Storage size of the external children tracking
preimagesSize common.StorageSize // Storage size of the preimages Cache
dirtiesSize common.StorageSize // Storage size of the dirty node cache (exc. metadata)
childrenSize common.StorageSize // Storage size of the external children tracking
preimages *preimageStore // The store for caching preimages
Lock sync.RWMutex
lock sync.RWMutex
}
// rawNode is a simple binary blob used to differentiate between collapsed trie
@ -281,15 +279,17 @@ func NewDatabaseWithConfig(diskdb ethdb.KeyValueStore, config *Config) *Database
if config != nil && config.Cache > 0 {
cleans = fastcache.New(config.Cache * 1024 * 1024)
}
var preimage *preimageStore
if config != nil && config.Preimages {
preimage = newPreimageStore(diskdb)
}
db := &Database{
diskdb: diskdb,
cleans: cleans,
dirties: map[common.Hash]*cachedNode{{}: {
children: make(map[common.Hash]uint16),
}},
}
if config == nil || config.Preimages { // TODO(karalabe): Flip to default off in the future
db.preimages = make(map[common.Hash][]byte)
preimages: preimage,
}
return db
}
@ -299,13 +299,34 @@ func (db *Database) DiskDB() ethdb.KeyValueStore {
return db.diskdb
}
// Preimage retrieves a cached trie Node pre-image from memory. If it cannot be
// found cached, the method queries the persistent database for the content.
func (db *Database) Preimage(hash common.Hash) []byte {
if db.preimages == nil {
return nil
}
return db.preimages.preimage(hash)
}
func (db *Database) InsertPreimage(secKeyCache map[string][]byte) {
if db.preimages == nil {
return
}
preimages := make(map[common.Hash][]byte)
for hk, key := range secKeyCache {
preimages[common.BytesToHash([]byte(hk))] = key
}
db.preimages.insertPreimage(preimages)
}
// insert inserts a collapsed trie node into the memory database.
// The blob size must be specified to allow proper size tracking.
// All nodes inserted by this function will be reference tracked
// and in theory should only used for **trie nodes** insertion.
func (db *Database) insert(hash common.Hash, size int, node node) {
db.Lock.Lock()
defer db.Lock.Unlock()
db.lock.Lock()
defer db.lock.Unlock()
// If the node's already cached, skip
if _, ok := db.dirties[hash]; ok {
@ -335,26 +356,8 @@ func (db *Database) insert(hash common.Hash, size int, node node) {
db.dirtiesSize += common.StorageSize(common.HashLength + entry.size)
}
// insertPreimage writes a new trie node pre-image to the memory database if it's
// yet unknown. The method will NOT make a copy of the slice,
// only use if the preimage will NOT be changed later on.
//
// Note, this method assumes that the database's Lock is held!
func (db *Database) InsertPreimage(hash common.Hash, preimage []byte) {
// Short circuit if preimage collection is disabled
if db.preimages == nil {
return
}
// Track the preimage if a yet unknown one
if _, ok := db.preimages[hash]; ok {
return
}
db.preimages[hash] = preimage
db.preimagesSize += common.StorageSize(common.HashLength + len(preimage))
}
// node retrieves a cached trie Node from memory, or returns nil if none can be
// found in the memory Cache.
// node retrieves a cached trie node from memory, or returns nil if none can be
// found in the memory cache.
func (db *Database) node(hash common.Hash) node {
// Retrieve the Node from the clean Cache if available
if db.cleans != nil {
@ -365,9 +368,9 @@ func (db *Database) node(hash common.Hash) node {
}
}
// Retrieve the Node from the dirty Cache if available
db.Lock.RLock()
db.lock.RLock()
dirty := db.dirties[hash]
db.Lock.RUnlock()
db.lock.RUnlock()
if dirty != nil {
memcacheDirtyHitMeter.Mark(1)
@ -405,9 +408,9 @@ func (db *Database) Node(hash common.Hash) ([]byte, error) {
}
}
// Retrieve the Node from the dirty Cache if available
db.Lock.RLock()
db.lock.RLock()
dirty := db.dirties[hash]
db.Lock.RUnlock()
db.lock.RUnlock()
if dirty != nil {
memcacheDirtyHitMeter.Mark(1)
@ -429,30 +432,12 @@ func (db *Database) Node(hash common.Hash) ([]byte, error) {
return nil, errors.New("not found")
}
// Preimage retrieves a cached trie Node pre-image from memory. If it cannot be
// found cached, the method queries the persistent database for the content.
func (db *Database) Preimage(hash common.Hash) []byte {
// Short circuit if preimage collection is disabled
if db.preimages == nil {
return nil
}
// Retrieve the Node from Cache if available
db.Lock.RLock()
preimage := db.preimages[hash]
db.Lock.RUnlock()
if preimage != nil {
return preimage
}
return rawdb.ReadPreimage(db.diskdb, hash)
}
// Nodes retrieves the hashes of all the nodes cached within the memory database.
// This method is extremely expensive and should only be used to validate internal
// states in test code.
func (db *Database) Nodes() []common.Hash {
db.Lock.RLock()
defer db.Lock.RUnlock()
db.lock.RLock()
defer db.lock.RUnlock()
var hashes = make([]common.Hash, 0, len(db.dirties))
for hash := range db.dirties {
@ -468,8 +453,8 @@ func (db *Database) Nodes() []common.Hash {
// and external node(e.g. storage trie root), all internal trie nodes
// are referenced together by database itself.
func (db *Database) Reference(child common.Hash, parent common.Hash) {
db.Lock.Lock()
defer db.Lock.Unlock()
db.lock.Lock()
defer db.lock.Unlock()
db.reference(child, parent)
}
@ -502,8 +487,8 @@ func (db *Database) Dereference(root common.Hash) {
log.Error("Attempted to dereference the trie Cache meta root")
return
}
db.Lock.Lock()
defer db.Lock.Unlock()
db.lock.Lock()
defer db.lock.Unlock()
nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now()
db.dereference(root, common.Hash{})
@ -591,19 +576,8 @@ func (db *Database) Cap(limit common.StorageSize) error {
// If the Preimage Cache got large enough, push to disk. If it's still small
// leave for later to deduplicate writes.
flushPreimages := db.preimagesSize > 4*1024*1024
if flushPreimages {
if db.preimages == nil {
log.Error("Attempted to write preimages whilst disabled")
} else {
rawdb.WritePreimages(batch, db.preimages)
if batch.ValueSize() > ethdb.IdealBatchSize {
if err := batch.Write(); err != nil {
return err
}
batch.Reset()
}
}
if db.preimages != nil {
db.preimages.commit(false)
}
// Keep committing nodes from the flush-list until we're below allowance
oldest := db.oldest
@ -635,16 +609,9 @@ func (db *Database) Cap(limit common.StorageSize) error {
return err
}
// Write successful, clear out the flushed data
db.Lock.Lock()
defer db.Lock.Unlock()
db.lock.Lock()
defer db.lock.Unlock()
if flushPreimages {
if db.preimages == nil {
log.Error("Attempted to reset preimage cache whilst disabled")
} else {
db.preimages, db.preimagesSize = make(map[common.Hash][]byte), 0
}
}
for db.oldest != oldest {
node := db.dirties[db.oldest]
delete(db.dirties, db.oldest)
@ -688,13 +655,7 @@ func (db *Database) Commit(node common.Hash, report bool) error {
// Move all of the accumulated preimages into a write batch
if db.preimages != nil {
rawdb.WritePreimages(batch, db.preimages)
// Since we're going to replay trie Node writes into the clean Cache, flush out
// any batched pre-images before continuing.
if err := batch.Write(); err != nil {
return err
}
batch.Reset()
db.preimages.commit(true)
}
// Move the trie itself into the batch, flushing if enough data is accumulated
nodes, storage := len(db.dirties), db.dirtiesSize
@ -710,16 +671,13 @@ func (db *Database) Commit(node common.Hash, report bool) error {
return err
}
// Uncache any leftovers in the last batch
db.Lock.Lock()
defer db.Lock.Unlock()
db.lock.Lock()
defer db.lock.Unlock()
batch.Replay(uncacher)
batch.Reset()
// Reset the storage counters and bumpd metrics
if db.preimages != nil {
db.preimages, db.preimagesSize = make(map[common.Hash][]byte), 0
}
// Reset the storage counters and bumped metrics
memcacheCommitTimeTimer.Update(time.Since(start))
memcacheCommitSizeMeter.Mark(int64(storage - db.dirtiesSize))
memcacheCommitNodesMeter.Mark(int64(nodes - len(db.dirties)))
@ -760,10 +718,10 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleane
if err := batch.Write(); err != nil {
return err
}
db.Lock.Lock()
db.lock.Lock()
batch.Replay(uncacher)
batch.Reset()
db.Lock.Unlock()
db.lock.Unlock()
}
return nil
}
@ -820,13 +778,17 @@ func (c *cleaner) Delete(key []byte) error {
// Size returns the current storage size of the memory Cache in front of the
// persistent database layer.
func (db *Database) Size() (common.StorageSize, common.StorageSize) {
db.Lock.RLock()
defer db.Lock.RUnlock()
db.lock.RLock()
defer db.lock.RUnlock()
// Db.dirtiesSize only contains the useful data in the Cache, but when reporting
// the total memory consumption, the maintenance metadata is also needed to be
// counted.
var metadataSize = common.StorageSize((len(db.dirties) - 1) * cachedNodeSize)
var metarootRefs = common.StorageSize(len(db.dirties[common.Hash{}].children) * (common.HashLength + 2))
return db.dirtiesSize + db.childrenSize + metadataSize - metarootRefs, db.preimagesSize
var preimageSize common.StorageSize
if db.preimages != nil {
preimageSize = db.preimages.size()
}
return db.dirtiesSize + db.childrenSize + metadataSize - metarootRefs, preimageSize
}

95
trie/preimages.go Normal file
View file

@ -0,0 +1,95 @@
// Copyright 2022 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
"sync"
"github.com/XinFinOrg/XDPoSChain/common"
"github.com/XinFinOrg/XDPoSChain/core/rawdb"
"github.com/XinFinOrg/XDPoSChain/ethdb"
)
// preimageStore is the store for caching preimages of node key.
type preimageStore struct {
lock sync.RWMutex
disk ethdb.KeyValueStore
preimages map[common.Hash][]byte // Preimages of nodes from the secure trie
preimagesSize common.StorageSize // Storage size of the preimages cache
}
// newPreimageStore initializes the store for caching preimages.
func newPreimageStore(disk ethdb.KeyValueStore) *preimageStore {
return &preimageStore{
disk: disk,
preimages: make(map[common.Hash][]byte),
}
}
// insertPreimage writes a new trie node pre-image to the memory database if it's
// yet unknown. The method will NOT make a copy of the slice, only use if the
// preimage will NOT be changed later on.
func (store *preimageStore) insertPreimage(preimages map[common.Hash][]byte) {
store.lock.Lock()
defer store.lock.Unlock()
for hash, preimage := range preimages {
if _, ok := store.preimages[hash]; ok {
continue
}
store.preimages[hash] = preimage
store.preimagesSize += common.StorageSize(common.HashLength + len(preimage))
}
}
// preimage retrieves a cached trie node pre-image from memory. If it cannot be
// found cached, the method queries the persistent database for the content.
func (store *preimageStore) preimage(hash common.Hash) []byte {
store.lock.RLock()
preimage := store.preimages[hash]
store.lock.RUnlock()
if preimage != nil {
return preimage
}
return rawdb.ReadPreimage(store.disk, hash)
}
// commit flushes the cached preimages into the disk.
func (store *preimageStore) commit(force bool) error {
store.lock.Lock()
defer store.lock.Unlock()
if store.preimagesSize <= 4*1024*1024 && !force {
return nil
}
batch := store.disk.NewBatch()
rawdb.WritePreimages(batch, store.preimages)
if err := batch.Write(); err != nil {
return err
}
store.preimages, store.preimagesSize = make(map[common.Hash][]byte), 0
return nil
}
// size returns the current storage size of accumulated preimages.
func (store *preimageStore) size() common.StorageSize {
store.lock.RLock()
defer store.lock.RUnlock()
return store.preimagesSize
}

View file

@ -37,6 +37,7 @@ import (
// SecureTrie is not safe for concurrent use.
type SecureTrie struct {
trie Trie
preimages *preimageStore
hashKeyBuf [common.HashLength]byte
secKeyCache map[string][]byte
secKeyCacheOwner *SecureTrie // Pointer to self, replace the key Cache on mismatch
@ -61,7 +62,7 @@ func NewSecure(owner common.Hash, root common.Hash, db *Database) (*SecureTrie,
if err != nil {
return nil, err
}
return &SecureTrie{trie: *trie}, nil
return &SecureTrie{trie: *trie, preimages: db.preimages}, nil
}
// Get returns the value for key stored in the trie.
@ -153,7 +154,10 @@ func (t *SecureTrie) GetKey(shaKey []byte) []byte {
if key, ok := t.getSecKeyCache()[string(shaKey)]; ok {
return key
}
return t.trie.Db.Preimage(common.BytesToHash(shaKey))
if t.preimages == nil {
return nil
}
return t.preimages.preimage(common.BytesToHash(shaKey))
}
// Commit writes all nodes and the secure hash pre-images to the trie's database.
@ -164,12 +168,12 @@ func (t *SecureTrie) GetKey(shaKey []byte) []byte {
func (t *SecureTrie) Commit(onleaf LeafCallback) (common.Hash, int, error) {
// Write all the pre-images to the actual disk database
if len(t.getSecKeyCache()) > 0 {
if t.trie.Db.preimages != nil { // Ugly direct check but avoids the below write lock
t.trie.Db.Lock.Lock()
if t.preimages != nil {
preimages := make(map[common.Hash][]byte)
for hk, key := range t.secKeyCache {
t.trie.Db.InsertPreimage(common.BytesToHash([]byte(hk)), key)
preimages[common.BytesToHash([]byte(hk))] = key
}
t.trie.Db.Lock.Unlock()
t.preimages.insertPreimage(preimages)
}
t.secKeyCache = make(map[string][]byte)
}
@ -187,6 +191,7 @@ func (t *SecureTrie) Hash() common.Hash {
func (t *SecureTrie) Copy() *SecureTrie {
return &SecureTrie{
trie: *t.trie.Copy(),
preimages: t.preimages,
secKeyCache: t.secKeyCache,
}
}

View file

@ -52,7 +52,7 @@ type LeafCallback func(keys [][]byte, path []byte, leaf []byte, parent common.Ha
//
// Trie is not safe for concurrent use.
type Trie struct {
Db *Database
db *Database
root node
owner common.Hash
@ -71,10 +71,14 @@ func (t *Trie) newFlag() nodeFlag {
return nodeFlag{dirty: true}
}
func (t *Trie) Db() *Database {
return t.db
}
// Copy returns a copy of Trie.
func (t *Trie) Copy() *Trie {
return &Trie{
Db: t.Db,
db: t.db,
root: t.root,
owner: t.owner,
unhashed: t.unhashed,
@ -105,7 +109,7 @@ func newWithRootNode(root node) *Trie {
return &Trie{
root: root,
//tracer: newTracer(),
Db: NewDatabase(rawdb.NewMemoryDatabase()),
db: NewDatabase(rawdb.NewMemoryDatabase()),
}
}
@ -115,7 +119,7 @@ func newTrie(owner common.Hash, root common.Hash, db *Database) (*Trie, error) {
panic("trie.New called without a database")
}
trie := &Trie{
Db: db,
db: db,
owner: owner,
//tracer: newTracer(),
}
@ -227,7 +231,7 @@ func (t *Trie) tryGetNode(origNode node, path []byte, pos int) (item []byte, new
if hash == nil {
return nil, origNode, 0, errors.New("non-consensus node")
}
blob, err := t.Db.Node(common.BytesToHash(hash))
blob, err := t.db.Node(common.BytesToHash(hash))
return blob, origNode, 1, err
}
// Path still needs to be traversed, descend into children
@ -729,7 +733,7 @@ func (t *Trie) resolve(n node, prefix []byte) (node, error) {
func (t *Trie) resolveHash(n hashNode, prefix []byte) (node, error) {
hash := common.BytesToHash(n)
if node := t.Db.node(hash); node != nil {
if node := t.db.node(hash); node != nil {
return node, nil
}
return nil, &MissingNodeError{Owner: t.owner, NodeHash: hash, Path: prefix}
@ -737,7 +741,7 @@ func (t *Trie) resolveHash(n hashNode, prefix []byte) (node, error) {
func (t *Trie) resolveBlob(n hashNode, prefix []byte) ([]byte, error) {
hash := common.BytesToHash(n)
blob, _ := t.Db.Node(hash)
blob, _ := t.db.Node(hash)
if len(blob) != 0 {
return blob, nil
}
@ -755,7 +759,7 @@ func (t *Trie) Hash() common.Hash {
// Commit writes all nodes to the trie's memory database, tracking the internal
// and external (for account tries) references.
func (t *Trie) Commit(onleaf LeafCallback) (common.Hash, int, error) {
if t.Db == nil {
if t.db == nil {
panic("commit called on trie with nil database")
}
defer t.tracer.reset()
@ -785,10 +789,10 @@ func (t *Trie) Commit(onleaf LeafCallback) (common.Hash, int, error) {
wg.Add(1)
go func() {
defer wg.Done()
h.commitLoop(t.Db)
h.commitLoop(t.db)
}()
}
newRoot, committed, err := h.Commit(t.root, t.Db)
newRoot, committed, err := h.Commit(t.root, t.db)
if onleaf != nil {
// The leafch is created in newCommitter if there was an onleaf callback
// provided. The commitLoop only _reads_ from it, and the commit

View file

@ -598,7 +598,7 @@ func TestTinyTrie(t *testing.T) {
if exp, root := common.HexToHash("0608c1d1dc3905fa22204c7a0e43644831c3b6d3def0f274be623a948197e64a"), trie.Hash(); exp != root {
t.Errorf("3: got %x, exp %x", root, exp)
}
checktr := NewEmpty(trie.Db)
checktr := NewEmpty(trie.Db())
it := NewIterator(trie.NodeIterator(nil))
for it.Next() {
checktr.Update(it.Key, it.Value)