go-ethereum/triedb/pathdb/reader.go
rjl493456442 7f78fa6912
triedb/pathdb, core: keep root->id mappings after truncation (#32502)
This pull request preserves the root->ID mappings in the path database
even after the associated state histories are truncated, regardless of
whether the truncation occurs at the head or the tail.

The motivation is to support an additional history type, trienode history. 
Since the root->ID mappings are shared between two history instances, 
they must not be removed by either one.

As a consequence, the root->ID mappings remain in the database even
after the corresponding histories are pruned. While these mappings may 
become  dangling, it is safe and cheap to keep them.

Additionally, this pull request enhances validation during historical
reader construction, ensuring that only canonical historical state will be
served.
2025-08-29 15:43:58 +08:00

320 lines
12 KiB
Go

// Copyright 2024 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package pathdb
import (
"errors"
"fmt"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/hexutil"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/triedb/database"
)
// The types of locations where the node is found.
const (
locDirtyCache = "dirty" // dirty cache
locCleanCache = "clean" // clean cache
locDiskLayer = "disk" // persistent state
locDiffLayer = "diff" // diff layers
)
// nodeLoc is a helpful structure that contains the location where the node
// is found, as it's useful for debugging purposes.
type nodeLoc struct {
loc string
depth int
}
// string returns the string representation of node location.
func (loc *nodeLoc) string() string {
return fmt.Sprintf("loc: %s, depth: %d", loc.loc, loc.depth)
}
// reader implements the database.NodeReader interface, providing the functionalities to
// retrieve trie nodes by wrapping the internal state layer.
type reader struct {
db *Database
state common.Hash
noHashCheck bool
layer layer
}
// Node implements database.NodeReader interface, retrieving the node with specified
// node info. Don't modify the returned byte slice since it's not deep-copied
// and still be referenced by database.
func (r *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) {
blob, got, loc, err := r.layer.node(owner, path, 0)
if err != nil {
return nil, err
}
// Error out if the local one is inconsistent with the target.
if !r.noHashCheck && got != hash {
// Location is always available even if the node
// is not found.
switch loc.loc {
case locCleanCache:
nodeCleanFalseMeter.Mark(1)
case locDirtyCache:
nodeDirtyFalseMeter.Mark(1)
case locDiffLayer:
nodeDiffFalseMeter.Mark(1)
case locDiskLayer:
nodeDiskFalseMeter.Mark(1)
}
blobHex := "nil"
if len(blob) > 0 {
blobHex = hexutil.Encode(blob)
}
log.Error("Unexpected trie node", "location", loc.loc, "owner", owner.Hex(), "path", path, "expect", hash.Hex(), "got", got.Hex(), "blob", blobHex)
return nil, fmt.Errorf("unexpected node: (%x %v), %x!=%x, %s, blob: %s", owner, path, hash, got, loc.string(), blobHex)
}
return blob, nil
}
// AccountRLP directly retrieves the account associated with a particular hash.
// An error will be returned if the read operation exits abnormally. Specifically,
// if the layer is already stale.
//
// Note:
// - the returned account data is not a copy, please don't modify it
// - no error will be returned if the requested account is not found in database
func (r *reader) AccountRLP(hash common.Hash) ([]byte, error) {
l, err := r.db.tree.lookupAccount(hash, r.state)
if err != nil {
return nil, err
}
// If the located layer is stale, fall back to the slow path to retrieve
// the account data. This is an edge case where the located layer is the
// disk layer (e.g., the requested account was not changed in all the diff
// layers), and it becomes stale within a very short time window.
//
// This fallback mechanism is essential, because the traversal starts from
// the entry point layer and goes down, the staleness of the disk layer does
// not affect the result unless the entry point layer is also stale.
blob, err := l.account(hash, 0)
if errors.Is(err, errSnapshotStale) {
return r.layer.account(hash, 0)
}
return blob, err
}
// Account directly retrieves the account associated with a particular hash in
// the slim data format. An error will be returned if the read operation exits
// abnormally. Specifically, if the layer is already stale.
//
// Note:
// - the returned account object is safe to modify
// - no error will be returned if the requested account is not found in database
func (r *reader) Account(hash common.Hash) (*types.SlimAccount, error) {
blob, err := r.AccountRLP(hash)
if err != nil {
return nil, err
}
if len(blob) == 0 {
return nil, nil
}
account := new(types.SlimAccount)
if err := rlp.DecodeBytes(blob, account); err != nil {
panic(err)
}
return account, nil
}
// Storage directly retrieves the storage data associated with a particular hash,
// within a particular account. An error will be returned if the read operation
// exits abnormally. Specifically, if the layer is already stale.
//
// Note:
// - the returned storage data is not a copy, please don't modify it
// - no error will be returned if the requested slot is not found in database
func (r *reader) Storage(accountHash, storageHash common.Hash) ([]byte, error) {
l, err := r.db.tree.lookupStorage(accountHash, storageHash, r.state)
if err != nil {
return nil, err
}
// If the located layer is stale, fall back to the slow path to retrieve
// the storage data. This is an edge case where the located layer is the
// disk layer (e.g., the requested account was not changed in all the diff
// layers), and it becomes stale within a very short time window.
//
// This fallback mechanism is essential, because the traversal starts from
// the entry point layer and goes down, the staleness of the disk layer does
// not affect the result unless the entry point layer is also stale.
blob, err := l.storage(accountHash, storageHash, 0)
if errors.Is(err, errSnapshotStale) {
return r.layer.storage(accountHash, storageHash, 0)
}
return blob, err
}
// NodeReader retrieves a layer belonging to the given state root.
func (db *Database) NodeReader(root common.Hash) (database.NodeReader, error) {
layer := db.tree.get(root)
if layer == nil {
return nil, fmt.Errorf("state %#x is not available", root)
}
return &reader{
db: db,
state: root,
noHashCheck: db.isVerkle,
layer: layer,
}, nil
}
// StateReader returns a reader that allows access to the state data associated
// with the specified state.
func (db *Database) StateReader(root common.Hash) (database.StateReader, error) {
layer := db.tree.get(root)
if layer == nil {
return nil, fmt.Errorf("state %#x is not available", root)
}
return &reader{
db: db,
state: root,
layer: layer,
}, nil
}
// HistoricalStateReader is a wrapper over history reader, providing access to
// historical state.
type HistoricalStateReader struct {
db *Database
reader *historyReader
id uint64
}
// HistoricReader constructs a reader for accessing the requested historic state.
func (db *Database) HistoricReader(root common.Hash) (*HistoricalStateReader, error) {
// Bail out if the state history hasn't been fully indexed
if db.stateIndexer == nil || db.stateFreezer == nil {
return nil, fmt.Errorf("historical state %x is not available", root)
}
if !db.stateIndexer.inited() {
return nil, errors.New("state histories haven't been fully indexed yet")
}
// - States at the current disk layer or above are directly accessible
// via `db.StateReader`.
//
// - States older than the current disk layer (including the disk layer
// itself) are available via `db.HistoricReader`.
id := rawdb.ReadStateID(db.diskdb, root)
if id == nil {
return nil, fmt.Errorf("state %#x is not available", root)
}
// Ensure the requested state is canonical, historical states on side chain
// are not accessible.
meta, err := readStateHistoryMeta(db.stateFreezer, *id+1)
if err != nil {
return nil, err // e.g., the referred state history has been pruned
}
if meta.parent != root {
return nil, fmt.Errorf("state %#x is not canonincal", root)
}
return &HistoricalStateReader{
id: *id,
db: db,
reader: newHistoryReader(db.diskdb, db.stateFreezer),
}, nil
}
// AccountRLP directly retrieves the account RLP associated with a particular
// address in the slim data format. An error will be returned if the read
// operation exits abnormally. Specifically, if the layer is already stale.
//
// Note:
// - the returned account is not a copy, please don't modify it.
// - no error will be returned if the requested account is not found in database.
func (r *HistoricalStateReader) AccountRLP(address common.Address) ([]byte, error) {
defer func(start time.Time) {
historicalAccountReadTimer.UpdateSince(start)
}(time.Now())
// TODO(rjl493456442): Theoretically, the obtained disk layer could become stale
// within a very short time window.
//
// While reading the account data while holding `db.tree.lock` can resolve
// this issue, but it will introduce a heavy contention over the lock.
//
// Let's optimistically assume the situation is very unlikely to happen,
// and try to define a low granularity lock if the current approach doesn't
// work later.
dl := r.db.tree.bottom()
hash := crypto.Keccak256Hash(address.Bytes())
latest, err := dl.account(hash, 0)
if err != nil {
return nil, err
}
return r.reader.read(newAccountIdentQuery(address, hash), r.id, dl.stateID(), latest)
}
// Account directly retrieves the account associated with a particular address in
// the slim data format. An error will be returned if the read operation exits
// abnormally. Specifically, if the layer is already stale.
//
// No error will be returned if the requested account is not found in database
func (r *HistoricalStateReader) Account(address common.Address) (*types.SlimAccount, error) {
blob, err := r.AccountRLP(address)
if err != nil {
return nil, err
}
if len(blob) == 0 {
return nil, nil
}
account := new(types.SlimAccount)
if err := rlp.DecodeBytes(blob, account); err != nil {
panic(err)
}
return account, nil
}
// Storage directly retrieves the storage data associated with a particular key,
// within a particular account. An error will be returned if the read operation
// exits abnormally. Specifically, if the layer is already stale.
//
// Note:
// - the returned storage data is not a copy, please don't modify it.
// - no error will be returned if the requested slot is not found in database.
func (r *HistoricalStateReader) Storage(address common.Address, key common.Hash) ([]byte, error) {
defer func(start time.Time) {
historicalStorageReadTimer.UpdateSince(start)
}(time.Now())
// TODO(rjl493456442): Theoretically, the obtained disk layer could become stale
// within a very short time window.
//
// While reading the account data while holding `db.tree.lock` can resolve
// this issue, but it will introduce a heavy contention over the lock.
//
// Let's optimistically assume the situation is very unlikely to happen,
// and try to define a low granularity lock if the current approach doesn't
// work later.
dl := r.db.tree.bottom()
addrHash := crypto.Keccak256Hash(address.Bytes())
keyHash := crypto.Keccak256Hash(key.Bytes())
latest, err := dl.storage(addrHash, keyHash, 0)
if err != nil {
return nil, err
}
return r.reader.read(newStorageIdentQuery(address, addrHash, key, keyHash), r.id, dl.stateID(), latest)
}