core/state: add hash-based filter methods and NewPartialStateSync

Extends ContractFilter interface with hash-based methods (ShouldSyncStorageByHash,
ShouldSyncCodeByHash) for efficient filtering during snap sync when only account
hashes are available.

Adds NewPartialStateSync() function that accepts filter callbacks to control which
accounts have their storage/code synced during healing. This prevents the healing
phase from re-syncing storage for accounts that were intentionally skipped during
initial sync.

Part of partial statefulness Phase 2.
This commit is contained in:
CPerezz 2026-02-02 13:35:22 +01:00
parent cc2b92b6a4
commit a5a5f40aa7
No known key found for this signature in database
GPG key ID: 62045F34B97177DD
3 changed files with 173 additions and 5 deletions

View file

@ -16,7 +16,10 @@
package partial
import "github.com/ethereum/go-ethereum/common"
import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
)
// ContractFilter determines which contracts' storage to sync and retain.
// This interface allows flexible filtering strategies for partial statefulness.
@ -32,21 +35,34 @@ type ContractFilter interface {
// IsTracked returns true if this contract's storage is being tracked.
// Used by RPC handlers to determine if storage queries can be answered.
IsTracked(address common.Address) bool
// ShouldSyncStorageByHash returns true if storage should be synced for the
// contract with the given account hash. Used by snap sync which operates on hashes.
ShouldSyncStorageByHash(accountHash common.Hash) bool
// ShouldSyncCodeByHash returns true if bytecode should be synced for the
// contract with the given account hash. Used by snap sync which operates on hashes.
ShouldSyncCodeByHash(accountHash common.Hash) bool
}
// ConfiguredFilter implements ContractFilter based on a configured list of addresses.
// This is the primary implementation used in production.
type ConfiguredFilter struct {
contracts map[common.Address]struct{}
contracts map[common.Address]struct{}
contractHashes map[common.Hash]struct{} // Pre-computed keccak256(address) for snap sync
}
// NewConfiguredFilter creates a new filter from a list of contract addresses.
// It pre-computes keccak256 hashes for efficient filtering during snap sync.
func NewConfiguredFilter(addresses []common.Address) *ConfiguredFilter {
m := make(map[common.Address]struct{}, len(addresses))
h := make(map[common.Hash]struct{}, len(addresses))
for _, addr := range addresses {
m[addr] = struct{}{}
// Snap sync uses keccak256(address) as account hash
h[crypto.Keccak256Hash(addr.Bytes())] = struct{}{}
}
return &ConfiguredFilter{contracts: m}
return &ConfiguredFilter{contracts: m, contractHashes: h}
}
// ShouldSyncStorage returns true if the contract is in the configured list.
@ -67,6 +83,20 @@ func (f *ConfiguredFilter) IsTracked(addr common.Address) bool {
return ok
}
// ShouldSyncStorageByHash returns true if the contract hash is in the configured list.
// Used by snap sync which operates on account hashes rather than addresses.
func (f *ConfiguredFilter) ShouldSyncStorageByHash(accountHash common.Hash) bool {
_, ok := f.contractHashes[accountHash]
return ok
}
// ShouldSyncCodeByHash returns true if the contract hash is in the configured list.
// Used by snap sync which operates on account hashes rather than addresses.
func (f *ConfiguredFilter) ShouldSyncCodeByHash(accountHash common.Hash) bool {
_, ok := f.contractHashes[accountHash]
return ok
}
// Contracts returns the list of tracked contract addresses.
func (f *ConfiguredFilter) Contracts() []common.Address {
result := make([]common.Address, 0, len(f.contracts))
@ -94,3 +124,13 @@ func (f *AllowAllFilter) ShouldSyncCode(addr common.Address) bool {
func (f *AllowAllFilter) IsTracked(addr common.Address) bool {
return true
}
// ShouldSyncStorageByHash always returns true for full node behavior.
func (f *AllowAllFilter) ShouldSyncStorageByHash(accountHash common.Hash) bool {
return true
}
// ShouldSyncCodeByHash always returns true for full node behavior.
func (f *AllowAllFilter) ShouldSyncCodeByHash(accountHash common.Hash) bool {
return true
}

View file

@ -0,0 +1,108 @@
// Copyright 2025 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package partial
import (
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
)
func TestConfiguredFilterBasic(t *testing.T) {
// Test empty filter
emptyFilter := NewConfiguredFilter(nil)
addr := common.HexToAddress("0x1234567890123456789012345678901234567890")
if emptyFilter.ShouldSyncStorage(addr) {
t.Error("Empty filter should not allow any storage")
}
if emptyFilter.ShouldSyncCode(addr) {
t.Error("Empty filter should not allow any code")
}
if emptyFilter.IsTracked(addr) {
t.Error("Empty filter should not track any address")
}
// Test filter with addresses
tracked := []common.Address{
common.HexToAddress("0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2"),
common.HexToAddress("0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48"),
}
filter := NewConfiguredFilter(tracked)
// Tracked addresses should pass
for _, addr := range tracked {
if !filter.ShouldSyncStorage(addr) {
t.Errorf("Tracked address %s should allow storage", addr.Hex())
}
}
// Untracked address should not pass
untracked := common.HexToAddress("0x0000000000000000000000000000000000000001")
if filter.ShouldSyncStorage(untracked) {
t.Error("Untracked address should not allow storage")
}
}
func TestConfiguredFilterHashConsistency(t *testing.T) {
tracked := []common.Address{
common.HexToAddress("0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2"),
}
filter := NewConfiguredFilter(tracked)
// Address-based and hash-based methods should be consistent
for _, addr := range tracked {
hash := crypto.Keccak256Hash(addr.Bytes())
addrStorage := filter.ShouldSyncStorage(addr)
hashStorage := filter.ShouldSyncStorageByHash(hash)
if addrStorage != hashStorage {
t.Errorf("Inconsistent storage filter: addr=%v, hash=%v", addrStorage, hashStorage)
}
addrCode := filter.ShouldSyncCode(addr)
hashCode := filter.ShouldSyncCodeByHash(hash)
if addrCode != hashCode {
t.Errorf("Inconsistent code filter: addr=%v, hash=%v", addrCode, hashCode)
}
}
}
func TestAllowAllFilterInterface(t *testing.T) {
// Verify AllowAllFilter implements ContractFilter
var filter ContractFilter = &AllowAllFilter{}
addr := common.HexToAddress("0x1234567890123456789012345678901234567890")
hash := crypto.Keccak256Hash(addr.Bytes())
if !filter.ShouldSyncStorage(addr) {
t.Error("AllowAllFilter should allow storage")
}
if !filter.ShouldSyncCode(addr) {
t.Error("AllowAllFilter should allow code")
}
if !filter.IsTracked(addr) {
t.Error("AllowAllFilter should track all addresses")
}
if !filter.ShouldSyncStorageByHash(hash) {
t.Error("AllowAllFilter should allow storage by hash")
}
if !filter.ShouldSyncCodeByHash(hash) {
t.Error("AllowAllFilter should allow code by hash")
}
}

View file

@ -26,6 +26,15 @@ import (
// NewStateSync creates a new state trie download scheduler.
func NewStateSync(root common.Hash, database ethdb.KeyValueReader, onLeaf func(keys [][]byte, leaf []byte) error, scheme string) *trie.Sync {
return NewPartialStateSync(root, database, onLeaf, scheme, nil, nil)
}
// NewPartialStateSync creates a state trie download scheduler with optional filtering.
// The shouldSyncStorage callback, if non-nil, is called with the account hash to determine
// whether to sync storage for that account. This enables partial statefulness where only
// selected contracts have their storage synced.
// The shouldSyncCode callback, if non-nil, is called to determine whether to sync bytecode.
func NewPartialStateSync(root common.Hash, database ethdb.KeyValueReader, onLeaf func(keys [][]byte, leaf []byte) error, scheme string, shouldSyncStorage func(accountHash common.Hash) bool, shouldSyncCode func(accountHash common.Hash) bool) *trie.Sync {
// Register the storage slot callback if the external callback is specified.
var onSlot func(keys [][]byte, path []byte, leaf []byte, parent common.Hash, parentPath []byte) error
if onLeaf != nil {
@ -46,8 +55,19 @@ func NewStateSync(root common.Hash, database ethdb.KeyValueReader, onLeaf func(k
if err := rlp.DecodeBytes(leaf, &obj); err != nil {
return err
}
syncer.AddSubTrie(obj.Root, path, parent, parentPath, onSlot)
syncer.AddCodeEntry(common.BytesToHash(obj.CodeHash), path, parent, parentPath)
// Extract account hash from the path (first key in keys slice)
var accountHash common.Hash
if len(keys) > 0 {
accountHash = common.BytesToHash(keys[0])
}
// Only add storage subtrie if filter allows it (or no filter is set)
if shouldSyncStorage == nil || shouldSyncStorage(accountHash) {
syncer.AddSubTrie(obj.Root, path, parent, parentPath, onSlot)
}
// Only add code entry if filter allows it (or no filter is set)
if shouldSyncCode == nil || shouldSyncCode(accountHash) {
syncer.AddCodeEntry(common.BytesToHash(obj.CodeHash), path, parent, parentPath)
}
return nil
}
syncer = trie.NewSync(root, database, onAccount, scheme)