core, trie: prepare for path-based trie storage #26603 (#1126)

This PR moves some trie-related db accessor methods to a different file, and also removes the schema type. Instead of the schema type, a string is used to distinguish between hashbased/pathbased db accessors.
This also moves some code from trie package to rawdb package.

This PR is intended to be a no-functionality-change prep PR for #25963 .

---------

Co-authored-by: Martin Holst Swende <martin@swende.se>
Co-authored-by: Gary Rong <garyrong0905@gmail.com>
This commit is contained in:
Daniel Liu 2025-11-18 13:50:00 +08:00 committed by GitHub
parent d47260ea98
commit 3b3aa9b013
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 294 additions and 139 deletions

View file

@ -92,29 +92,3 @@ func DeleteCode(db ethdb.KeyValueWriter, hash common.Hash) {
log.Crit("Failed to delete contract code", "err", err)
}
}
// ReadTrieNode retrieves the trie node of the provided hash.
func ReadTrieNode(db ethdb.KeyValueReader, hash common.Hash) []byte {
data, _ := db.Get(hash.Bytes())
return data
}
// HasTrieNode checks if the trie node with the provided hash is present in db.
func HasTrieNode(db ethdb.KeyValueReader, hash common.Hash) bool {
ok, _ := db.Has(hash.Bytes())
return ok
}
// WriteTrieNode writes the provided trie node database.
func WriteTrieNode(db ethdb.KeyValueWriter, hash common.Hash, node []byte) {
if err := db.Put(hash.Bytes(), node); err != nil {
log.Crit("Failed to store trie node", "err", err)
}
}
// DeleteTrieNode deletes the specified trie node from the database.
func DeleteTrieNode(db ethdb.KeyValueWriter, hash common.Hash) {
if err := db.Delete(hash.Bytes()); err != nil {
log.Crit("Failed to delete trie node", "err", err)
}
}

View file

@ -0,0 +1,263 @@
// Copyright 2022 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>
package rawdb
import (
"fmt"
"sync"
"github.com/XinFinOrg/XDPoSChain/common"
"github.com/XinFinOrg/XDPoSChain/crypto"
"github.com/XinFinOrg/XDPoSChain/ethdb"
"github.com/XinFinOrg/XDPoSChain/log"
"golang.org/x/crypto/sha3"
)
// HashScheme is the legacy hash-based state scheme with which trie nodes are
// stored in the disk with node hash as the database key. The advantage of this
// scheme is that different versions of trie nodes can be stored in disk, which
// is very beneficial for constructing archive nodes. The drawback is it will
// store different trie nodes on the same path to different locations on the disk
// with no data locality, and it's unfriendly for designing state pruning.
//
// Now this scheme is still kept for backward compatibility, and it will be used
// for archive node and some other tries(e.g. light trie).
const HashScheme = "hashScheme"
// PathScheme is the new path-based state scheme with which trie nodes are stored
// in the disk with node path as the database key. This scheme will only store one
// version of state data in the disk, which means that the state pruning operation
// is native. At the same time, this scheme will put adjacent trie nodes in the same
// area of the disk with good data locality property. But this scheme needs to rely
// on extra state diffs to survive deep reorg.
const PathScheme = "pathScheme"
// nodeHasher used to derive the hash of trie node.
type nodeHasher struct{ sha crypto.KeccakState }
var hasherPool = sync.Pool{
New: func() interface{} { return &nodeHasher{sha: sha3.NewLegacyKeccak256().(crypto.KeccakState)} },
}
func newNodeHasher() *nodeHasher { return hasherPool.Get().(*nodeHasher) }
func returnHasherToPool(h *nodeHasher) { hasherPool.Put(h) }
func (h *nodeHasher) hashData(data []byte) (n common.Hash) {
h.sha.Reset()
h.sha.Write(data)
h.sha.Read(n[:])
return n
}
// ReadAccountTrieNode retrieves the account trie node and the associated node
// hash with the specified node path.
func ReadAccountTrieNode(db ethdb.KeyValueReader, path []byte) ([]byte, common.Hash) {
data, err := db.Get(accountTrieNodeKey(path))
if err != nil {
return nil, common.Hash{}
}
hasher := newNodeHasher()
defer returnHasherToPool(hasher)
return data, hasher.hashData(data)
}
// HasAccountTrieNode checks the account trie node presence with the specified
// node path and the associated node hash.
func HasAccountTrieNode(db ethdb.KeyValueReader, path []byte, hash common.Hash) bool {
data, err := db.Get(accountTrieNodeKey(path))
if err != nil {
return false
}
hasher := newNodeHasher()
defer returnHasherToPool(hasher)
return hasher.hashData(data) == hash
}
// WriteAccountTrieNode writes the provided account trie node into database.
func WriteAccountTrieNode(db ethdb.KeyValueWriter, path []byte, node []byte) {
if err := db.Put(accountTrieNodeKey(path), node); err != nil {
log.Crit("Failed to store account trie node", "err", err)
}
}
// DeleteAccountTrieNode deletes the specified account trie node from the database.
func DeleteAccountTrieNode(db ethdb.KeyValueWriter, path []byte) {
if err := db.Delete(accountTrieNodeKey(path)); err != nil {
log.Crit("Failed to delete account trie node", "err", err)
}
}
// ReadStorageTrieNode retrieves the storage trie node and the associated node
// hash with the specified node path.
func ReadStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path []byte) ([]byte, common.Hash) {
data, err := db.Get(storageTrieNodeKey(accountHash, path))
if err != nil {
return nil, common.Hash{}
}
hasher := newNodeHasher()
defer returnHasherToPool(hasher)
return data, hasher.hashData(data)
}
// HasStorageTrieNode checks the storage trie node presence with the provided
// node path and the associated node hash.
func HasStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path []byte, hash common.Hash) bool {
data, err := db.Get(storageTrieNodeKey(accountHash, path))
if err != nil {
return false
}
hasher := newNodeHasher()
defer returnHasherToPool(hasher)
return hasher.hashData(data) == hash
}
// WriteStorageTrieNode writes the provided storage trie node into database.
func WriteStorageTrieNode(db ethdb.KeyValueWriter, accountHash common.Hash, path []byte, node []byte) {
if err := db.Put(storageTrieNodeKey(accountHash, path), node); err != nil {
log.Crit("Failed to store storage trie node", "err", err)
}
}
// DeleteStorageTrieNode deletes the specified storage trie node from the database.
func DeleteStorageTrieNode(db ethdb.KeyValueWriter, accountHash common.Hash, path []byte) {
if err := db.Delete(storageTrieNodeKey(accountHash, path)); err != nil {
log.Crit("Failed to delete storage trie node", "err", err)
}
}
// ReadLegacyTrieNode retrieves the legacy trie node with the given
// associated node hash.
func ReadLegacyTrieNode(db ethdb.KeyValueReader, hash common.Hash) []byte {
data, err := db.Get(hash.Bytes())
if err != nil {
return nil
}
return data
}
// HasLegacyTrieNode checks if the trie node with the provided hash is present in db.
func HasLegacyTrieNode(db ethdb.KeyValueReader, hash common.Hash) bool {
ok, _ := db.Has(hash.Bytes())
return ok
}
// WriteLegacyTrieNode writes the provided legacy trie node to database.
func WriteLegacyTrieNode(db ethdb.KeyValueWriter, hash common.Hash, node []byte) {
if err := db.Put(hash.Bytes(), node); err != nil {
log.Crit("Failed to store legacy trie node", "err", err)
}
}
// DeleteLegacyTrieNode deletes the specified legacy trie node from database.
func DeleteLegacyTrieNode(db ethdb.KeyValueWriter, hash common.Hash) {
if err := db.Delete(hash.Bytes()); err != nil {
log.Crit("Failed to delete legacy trie node", "err", err)
}
}
// HasTrieNode checks the trie node presence with the provided node info and
// the associated node hash.
func HasTrieNode(db ethdb.KeyValueReader, owner common.Hash, path []byte, hash common.Hash, scheme string) bool {
switch scheme {
case HashScheme:
return HasLegacyTrieNode(db, hash)
case PathScheme:
if owner == (common.Hash{}) {
return HasAccountTrieNode(db, path, hash)
}
return HasStorageTrieNode(db, owner, path, hash)
default:
panic(fmt.Sprintf("Unknown scheme %v", scheme))
}
}
// ReadTrieNode retrieves the trie node from database with the provided node info
// and associated node hash.
// hashScheme-based lookup requires the following:
// - hash
//
// pathScheme-based lookup requires the following:
// - owner
// - path
func ReadTrieNode(db ethdb.KeyValueReader, owner common.Hash, path []byte, hash common.Hash, scheme string) []byte {
switch scheme {
case HashScheme:
return ReadLegacyTrieNode(db, hash)
case PathScheme:
var (
blob []byte
nHash common.Hash
)
if owner == (common.Hash{}) {
blob, nHash = ReadAccountTrieNode(db, path)
} else {
blob, nHash = ReadStorageTrieNode(db, owner, path)
}
if nHash != hash {
return nil
}
return blob
default:
panic(fmt.Sprintf("Unknown scheme %v", scheme))
}
}
// WriteTrieNode writes the trie node into database with the provided node info
// and associated node hash.
// hashScheme-based lookup requires the following:
// - hash
//
// pathScheme-based lookup requires the following:
// - owner
// - path
func WriteTrieNode(db ethdb.KeyValueWriter, owner common.Hash, path []byte, hash common.Hash, node []byte, scheme string) {
switch scheme {
case HashScheme:
WriteLegacyTrieNode(db, hash, node)
case PathScheme:
if owner == (common.Hash{}) {
WriteAccountTrieNode(db, path, node)
} else {
WriteStorageTrieNode(db, owner, path, node)
}
default:
panic(fmt.Sprintf("Unknown scheme %v", scheme))
}
}
// DeleteTrieNode deletes the trie node from database with the provided node info
// and associated node hash.
// hashScheme-based lookup requires the following:
// - hash
//
// pathScheme-based lookup requires the following:
// - owner
// - path
func DeleteTrieNode(db ethdb.KeyValueWriter, owner common.Hash, path []byte, hash common.Hash, scheme string) {
switch scheme {
case HashScheme:
DeleteLegacyTrieNode(db, hash)
case PathScheme:
if owner == (common.Hash{}) {
DeleteAccountTrieNode(db, path)
} else {
DeleteStorageTrieNode(db, owner, path)
}
default:
panic(fmt.Sprintf("Unknown scheme %v", scheme))
}
}

View file

@ -64,6 +64,10 @@ var (
// used by old db, now only used for conversion
oldReceiptsPrefix = []byte("receipts-")
// Path-based trie node scheme.
trieNodeAccountPrefix = []byte("A") // trieNodeAccountPrefix + hexPath -> trie node
trieNodeStoragePrefix = []byte("O") // trieNodeStoragePrefix + accountHash + hexPath -> trie node
preimagePrefix = []byte("secure-key-") // preimagePrefix + hash -> preimage
configPrefix = []byte("ethereum-config-") // config prefix for the db
genesisPrefix = []byte("ethereum-genesis-") // genesis state prefix for the db
@ -188,3 +192,13 @@ func configKey(hash common.Hash) []byte {
func genesisStateSpecKey(hash common.Hash) []byte {
return append(genesisPrefix, hash.Bytes()...)
}
// accountTrieNodeKey = trieNodeAccountPrefix + nodePath.
func accountTrieNodeKey(path []byte) []byte {
return append(trieNodeAccountPrefix, path...)
}
// storageTrieNodeKey = trieNodeStoragePrefix + accountHash + nodePath.
func storageTrieNodeKey(accountHash common.Hash, path []byte) []byte {
return append(append(trieNodeStoragePrefix, accountHash.Bytes()...), path...)
}

View file

@ -27,7 +27,7 @@ import (
)
// NewStateSync create a new state trie download scheduler.
func NewStateSync(root common.Hash, database ethdb.KeyValueReader, onLeaf func(keys [][]byte, leaf []byte) error, scheme trie.NodeScheme) *trie.Sync {
func NewStateSync(root common.Hash, database ethdb.KeyValueReader, onLeaf func(keys [][]byte, leaf []byte) error, scheme string) *trie.Sync {
// Register the storage slot callback if the external callback is specified.
var onSlot func(keys [][]byte, path []byte, leaf []byte, parent common.Hash, parentPath []byte) error
if onLeaf != nil {

View file

@ -664,14 +664,14 @@ func TestIncompleteStateSync(t *testing.T) {
for i, path := range addedPaths {
owner, inner := trie.ResolvePath([]byte(path))
hash := addedHashes[i]
val := scheme.ReadTrieNode(dstDb, owner, inner, hash)
val := rawdb.ReadTrieNode(dstDb, owner, inner, hash, scheme)
if val == nil {
t.Error("missing trie node")
}
scheme.DeleteTrieNode(dstDb, owner, inner, hash)
rawdb.DeleteTrieNode(dstDb, owner, inner, hash, scheme)
if err := checkStateConsistency(dstDb, srcRoot); err == nil {
t.Errorf("trie inconsistency not caught, missing: %v", path)
}
scheme.WriteTrieNode(dstDb, owner, inner, hash, val)
rawdb.WriteTrieNode(dstDb, owner, inner, hash, val, scheme)
}
}

View file

@ -299,7 +299,7 @@ func newStateSync(d *Downloader, root common.Hash) *stateSync {
cancel: make(chan struct{}),
done: make(chan struct{}),
started: make(chan struct{}),
sched: state.NewStateSync(root, d.stateDB, nil, d.blockchain.TrieDB().Scheme()),
sched: state.NewStateSync(root, d.stateDB, nil, rawdb.HashScheme),
keccak: sha3.NewLegacyKeccak256(),
trieTasks: make(map[string]*trieTask),
codeTasks: make(map[common.Hash]*codeTask),

View file

@ -422,7 +422,7 @@ func (db *Database) Node(hash common.Hash) ([]byte, error) {
memcacheDirtyMissMeter.Mark(1)
// Content unavailable in memory, attempt to retrieve from disk
enc := rawdb.ReadTrieNode(db.diskdb, hash)
enc := rawdb.ReadLegacyTrieNode(db.diskdb, hash)
if len(enc) != 0 {
if db.cleans != nil {
db.cleans.Set(hash[:], enc)
@ -588,7 +588,7 @@ func (db *Database) Cap(limit common.StorageSize) error {
for size > limit && oldest != (common.Hash{}) {
// Fetch the oldest referenced Node and push into the batch
node := db.dirties[oldest]
rawdb.WriteTrieNode(batch, oldest, node.rlp())
rawdb.WriteLegacyTrieNode(batch, oldest, node.rlp())
// If we exceeded the ideal batch size, commit and reset
if batch.ValueSize() >= ethdb.IdealBatchSize {
@ -720,7 +720,7 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleane
return err
}
// If we've reached an optimal batch size, commit and start over
rawdb.WriteTrieNode(batch, hash, node.rlp())
rawdb.WriteLegacyTrieNode(batch, hash, node.rlp())
if batch.ValueSize() >= ethdb.IdealBatchSize {
if err := batch.Write(); err != nil {
return err
@ -893,6 +893,6 @@ func (db *Database) CommitPreimages() error {
}
// Scheme returns the node scheme used in the database.
func (db *Database) Scheme() NodeScheme {
return &hashScheme{}
func (db *Database) Scheme() string {
return rawdb.HashScheme
}

View file

@ -1,96 +0,0 @@
// Copyright 2021 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
"github.com/XinFinOrg/XDPoSChain/common"
"github.com/XinFinOrg/XDPoSChain/core/rawdb"
"github.com/XinFinOrg/XDPoSChain/ethdb"
)
const (
HashScheme = "hashScheme" // Identifier of hash based node scheme
// Path-based scheme will be introduced in the following PRs.
// PathScheme = "pathScheme" // Identifier of path based node scheme
)
// NodeScheme describes the scheme for interacting nodes in disk.
type NodeScheme interface {
// Name returns the identifier of node scheme.
Name() string
// HasTrieNode checks the trie node presence with the provided node info and
// the associated node hash.
HasTrieNode(db ethdb.KeyValueReader, owner common.Hash, path []byte, hash common.Hash) bool
// ReadTrieNode retrieves the trie node from database with the provided node
// info and the associated node hash.
ReadTrieNode(db ethdb.KeyValueReader, owner common.Hash, path []byte, hash common.Hash) []byte
// WriteTrieNode writes the trie node into database with the provided node
// info and associated node hash.
WriteTrieNode(db ethdb.KeyValueWriter, owner common.Hash, path []byte, hash common.Hash, node []byte)
// DeleteTrieNode deletes the trie node from database with the provided node
// info and associated node hash.
DeleteTrieNode(db ethdb.KeyValueWriter, owner common.Hash, path []byte, hash common.Hash)
// IsTrieNode returns an indicator if the given database key is the key of
// trie node according to the scheme.
IsTrieNode(key []byte) (bool, []byte)
}
type hashScheme struct{}
// Name returns the identifier of hash based scheme.
func (scheme *hashScheme) Name() string {
return HashScheme
}
// HasTrieNode checks the trie node presence with the provided node info and
// the associated node hash.
func (scheme *hashScheme) HasTrieNode(db ethdb.KeyValueReader, owner common.Hash, path []byte, hash common.Hash) bool {
return rawdb.HasTrieNode(db, hash)
}
// ReadTrieNode retrieves the trie node from database with the provided node info
// and associated node hash.
func (scheme *hashScheme) ReadTrieNode(db ethdb.KeyValueReader, owner common.Hash, path []byte, hash common.Hash) []byte {
return rawdb.ReadTrieNode(db, hash)
}
// WriteTrieNode writes the trie node into database with the provided node info
// and associated node hash.
func (scheme *hashScheme) WriteTrieNode(db ethdb.KeyValueWriter, owner common.Hash, path []byte, hash common.Hash, node []byte) {
rawdb.WriteTrieNode(db, hash, node)
}
// DeleteTrieNode deletes the trie node from database with the provided node info
// and associated node hash.
func (scheme *hashScheme) DeleteTrieNode(db ethdb.KeyValueWriter, owner common.Hash, path []byte, hash common.Hash) {
rawdb.DeleteTrieNode(db, hash)
}
// IsTrieNode returns an indicator if the given database key is the key of trie
// node according to the scheme.
func (scheme *hashScheme) IsTrieNode(key []byte) (bool, []byte) {
if len(key) == common.HashLength {
return true, key
}
return false, nil
}

View file

@ -155,7 +155,7 @@ func (batch *syncMemBatch) hasCode(hash common.Hash) bool {
// unknown trie hashes to retrieve, accepts Node data associated with said hashes
// and reconstructs the trie step by step until all is done.
type Sync struct {
scheme NodeScheme // Node scheme descriptor used in database.
scheme string // Node scheme descriptor used in database.
database ethdb.KeyValueReader // Persistent database to check for existing entries
membatch *syncMemBatch // Memory buffer to avoid frequent database writes
nodeReqs map[string]*nodeRequest // Pending requests pertaining to a trie node path
@ -165,7 +165,7 @@ type Sync struct {
}
// NewSync creates a new trie data download scheduler.
func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallback, scheme NodeScheme) *Sync {
func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallback, scheme string) *Sync {
ts := &Sync{
scheme: scheme,
database: database,
@ -191,7 +191,7 @@ func (s *Sync) AddSubTrie(root common.Hash, path []byte, parent common.Hash, par
return
}
owner, inner := ResolvePath(path)
if s.scheme.HasTrieNode(s.database, owner, inner, root) {
if rawdb.HasTrieNode(s.database, owner, inner, root, s.scheme) {
return
}
// Assemble the new sub-trie sync request
@ -349,7 +349,7 @@ func (s *Sync) Commit(dbw ethdb.Batch) error {
// Dump the membatch into a database dbw
for path, value := range s.membatch.nodes {
owner, inner := ResolvePath([]byte(path))
s.scheme.WriteTrieNode(dbw, owner, inner, s.membatch.hashes[path], value)
rawdb.WriteTrieNode(dbw, owner, inner, s.membatch.hashes[path], value, s.scheme)
}
for hash, value := range s.membatch.codes {
rawdb.WriteCode(dbw, hash, value)
@ -469,7 +469,7 @@ func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) {
chash = common.BytesToHash(node)
owner, inner = ResolvePath(child.path)
)
if s.scheme.HasTrieNode(s.database, owner, inner, chash) {
if rawdb.HasTrieNode(s.database, owner, inner, chash, s.scheme) {
return
}
// Locally unknown node, schedule for retrieval

View file

@ -819,7 +819,7 @@ func TestCommitSequenceStackTrie(t *testing.T) {
// Another sponge is used for the stacktrie commits
stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"}
stTrie := NewStackTrie(func(owner common.Hash, path []byte, hash common.Hash, blob []byte) {
db.Scheme().WriteTrieNode(stackTrieSponge, owner, path, hash, blob)
rawdb.WriteTrieNode(stackTrieSponge, owner, path, hash, blob, db.Scheme())
})
// Fill the trie with elements
for i := 0; i < count; i++ {
@ -878,7 +878,7 @@ func TestCommitSequenceSmallRoot(t *testing.T) {
// Another sponge is used for the stacktrie commits
stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"}
stTrie := NewStackTrie(func(owner common.Hash, path []byte, hash common.Hash, blob []byte) {
db.Scheme().WriteTrieNode(stackTrieSponge, owner, path, hash, blob)
rawdb.WriteTrieNode(stackTrieSponge, owner, path, hash, blob, db.Scheme())
})
// Add a single small-element to the trie(s)
key := make([]byte, 5)